summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn')
-rw-r--r--src/lib/libcrypto/bn/arch/aarch64/bn_arch.h369
-rw-r--r--src/lib/libcrypto/bn/arch/alpha/bn_arch.h44
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_add.S165
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S155
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S138
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_mul.S167
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S157
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S244
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S197
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S145
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S242
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sub.S153
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bn_arch.c131
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bn_arch.h109
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/word_clz.S60
-rw-r--r--src/lib/libcrypto/bn/arch/arm/bn_arch.h73
-rw-r--r--src/lib/libcrypto/bn/arch/hppa/bn_arch.h24
-rw-r--r--src/lib/libcrypto/bn/arch/i386/bn_arch.h86
-rw-r--r--src/lib/libcrypto/bn/arch/m88k/bn_arch.h24
-rw-r--r--src/lib/libcrypto/bn/arch/mips64/bn_arch.h40
-rw-r--r--src/lib/libcrypto/bn/arch/powerpc/bn_arch.h39
-rw-r--r--src/lib/libcrypto/bn/arch/powerpc64/bn_arch.h44
-rw-r--r--src/lib/libcrypto/bn/arch/riscv64/bn_arch.h86
-rw-r--r--src/lib/libcrypto/bn/arch/sh/bn_arch.h24
-rw-r--r--src/lib/libcrypto/bn/arch/sparc64/bn_arch.h24
-rw-r--r--src/lib/libcrypto/bn/asm/alpha-mont.pl315
-rw-r--r--src/lib/libcrypto/bn/asm/armv4-mont.pl204
-rw-r--r--src/lib/libcrypto/bn/asm/bn-586.pl567
-rw-r--r--src/lib/libcrypto/bn/asm/co-586.pl287
-rw-r--r--src/lib/libcrypto/bn/asm/mips-mont.pl426
-rw-r--r--src/lib/libcrypto/bn/asm/mips.pl2234
-rw-r--r--src/lib/libcrypto/bn/asm/modexp512-x86_64.pl1393
-rw-r--r--src/lib/libcrypto/bn/asm/parisc-mont.pl985
-rw-r--r--src/lib/libcrypto/bn/asm/ppc-mont.pl329
-rw-r--r--src/lib/libcrypto/bn/asm/ppc.pl1968
-rwxr-xr-xsrc/lib/libcrypto/bn/asm/x86-mont.pl592
-rwxr-xr-xsrc/lib/libcrypto/bn/asm/x86_64-mont.pl1503
-rwxr-xr-xsrc/lib/libcrypto/bn/asm/x86_64-mont5.pl1192
-rw-r--r--src/lib/libcrypto/bn/bn.h520
-rw-r--r--src/lib/libcrypto/bn/bn_add.c341
-rw-r--r--src/lib/libcrypto/bn/bn_bpsw.c531
-rw-r--r--src/lib/libcrypto/bn/bn_const.c433
-rw-r--r--src/lib/libcrypto/bn/bn_convert.c757
-rw-r--r--src/lib/libcrypto/bn/bn_ctx.c161
-rw-r--r--src/lib/libcrypto/bn/bn_div.c458
-rw-r--r--src/lib/libcrypto/bn/bn_err.c110
-rw-r--r--src/lib/libcrypto/bn/bn_exp.c1330
-rw-r--r--src/lib/libcrypto/bn/bn_gcd.c818
-rw-r--r--src/lib/libcrypto/bn/bn_internal.h568
-rw-r--r--src/lib/libcrypto/bn/bn_isqrt.c234
-rw-r--r--src/lib/libcrypto/bn/bn_kron.c195
-rw-r--r--src/lib/libcrypto/bn/bn_lib.c752
-rw-r--r--src/lib/libcrypto/bn/bn_local.h335
-rw-r--r--src/lib/libcrypto/bn/bn_mod.c369
-rw-r--r--src/lib/libcrypto/bn/bn_mod_sqrt.c723
-rw-r--r--src/lib/libcrypto/bn/bn_mont.c621
-rw-r--r--src/lib/libcrypto/bn/bn_mul.c370
-rw-r--r--src/lib/libcrypto/bn/bn_prime.c423
-rw-r--r--src/lib/libcrypto/bn/bn_prime.h14
-rw-r--r--src/lib/libcrypto/bn/bn_prime.pl100
-rw-r--r--src/lib/libcrypto/bn/bn_primitives.c65
-rw-r--r--src/lib/libcrypto/bn/bn_print.c191
-rw-r--r--src/lib/libcrypto/bn/bn_rand.c340
-rw-r--r--src/lib/libcrypto/bn/bn_recp.c222
-rw-r--r--src/lib/libcrypto/bn/bn_shift.c175
-rw-r--r--src/lib/libcrypto/bn/bn_small_primes.c265
-rw-r--r--src/lib/libcrypto/bn/bn_sqr.c305
-rw-r--r--src/lib/libcrypto/bn/bn_word.c245
-rw-r--r--src/lib/libcrypto/bn/s2n_bignum.h856
-rw-r--r--src/lib/libcrypto/bn/s2n_bignum_internal.h36
70 files changed, 0 insertions, 27798 deletions
diff --git a/src/lib/libcrypto/bn/arch/aarch64/bn_arch.h b/src/lib/libcrypto/bn/arch/aarch64/bn_arch.h
deleted file mode 100644
index fe6f8a3aea..0000000000
--- a/src/lib/libcrypto/bn/arch/aarch64/bn_arch.h
+++ /dev/null
@@ -1,369 +0,0 @@
1/* $OpenBSD: bn_arch.h,v 1.13 2023/07/24 10:21:29 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <openssl/bn.h>
19
20#ifndef HEADER_BN_ARCH_H
21#define HEADER_BN_ARCH_H
22
23#ifndef OPENSSL_NO_ASM
24
25#if defined(__GNUC__)
26
27#define HAVE_BN_CLZW
28
29static inline int
30bn_clzw(BN_ULONG w)
31{
32 BN_ULONG n;
33
34 __asm__ ("clz %[n], %[w]"
35 : [n]"=r"(n)
36 : [w]"r"(w));
37
38 return n;
39}
40
41#define HAVE_BN_ADDW
42
43static inline void
44bn_addw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
45{
46 BN_ULONG carry, r0;
47
48 __asm__ (
49 "adds %[r0], %[a], %[b] \n"
50 "cset %[carry], cs \n"
51 : [carry]"=r"(carry), [r0]"=r"(r0)
52 : [a]"r"(a), [b]"r"(b)
53 : "cc");
54
55 *out_r1 = carry;
56 *out_r0 = r0;
57}
58
59#define HAVE_BN_ADDW_ADDW
60
61static inline void
62bn_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
63 BN_ULONG *out_r0)
64{
65 BN_ULONG carry, r0;
66
67 __asm__ (
68 "adds %[r0], %[a], %[b] \n"
69 "cset %[carry], cs \n"
70 "adds %[r0], %[r0], %[c] \n"
71 "cinc %[carry], %[carry], cs \n"
72 : [carry]"=&r"(carry), [r0]"=&r"(r0)
73 : [a]"r"(a), [b]"r"(b), [c]"r"(c)
74 : "cc");
75
76 *out_r1 = carry;
77 *out_r0 = r0;
78}
79
80#define HAVE_BN_QWADDQW
81
82static inline void
83bn_qwaddqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
84 BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG carry, BN_ULONG *out_carry,
85 BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
86{
87 BN_ULONG r3, r2, r1, r0;
88
89 __asm__ (
90 "adds xzr, %[carry], #-1 \n"
91 "adcs %[r0], %[a0], %[b0] \n"
92 "adcs %[r1], %[a1], %[b1] \n"
93 "adcs %[r2], %[a2], %[b2] \n"
94 "adcs %[r3], %[a3], %[b3] \n"
95 "cset %[carry], cs \n"
96 : [carry]"+r"(carry), [r3]"=&r"(r3), [r2]"=&r"(r2),
97 [r1]"=&r"(r1), [r0]"=&r"(r0)
98 : [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0),
99 [b3]"r"(b3), [b2]"r"(b2), [b1]"r"(b1), [b0]"r"(b0)
100 : "cc");
101
102 *out_carry = carry;
103 *out_r3 = r3;
104 *out_r2 = r2;
105 *out_r1 = r1;
106 *out_r0 = r0;
107}
108
109#define HAVE_BN_MULW
110
111static inline void
112bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
113{
114 BN_ULONG r1, r0;
115
116 /* Unsigned multiplication using a umulh/mul pair. */
117 __asm__ (
118 "umulh %[r1], %[a], %[b] \n"
119 "mul %[r0], %[a], %[b] \n"
120 : [r1]"=&r"(r1), [r0]"=r"(r0)
121 : [a]"r"(a), [b]"r"(b));
122
123 *out_r1 = r1;
124 *out_r0 = r0;
125}
126
127#define HAVE_BN_MULW_ADDW
128
129static inline void
130bn_mulw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
131 BN_ULONG *out_r0)
132{
133 BN_ULONG r1, r0;
134
135 __asm__ (
136 "umulh %[r1], %[a], %[b] \n"
137 "mul %[r0], %[a], %[b] \n"
138 "adds %[r0], %[r0], %[c] \n"
139 "adc %[r1], %[r1], xzr \n"
140 : [r1]"=&r"(r1), [r0]"=&r"(r0)
141 : [a]"r"(a), [b]"r"(b), [c]"r"(c)
142 : "cc");
143
144 *out_r1 = r1;
145 *out_r0 = r0;
146}
147
148#define HAVE_BN_MULW_ADDW_ADDW
149
150static inline void
151bn_mulw_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG d,
152 BN_ULONG *out_r1, BN_ULONG *out_r0)
153{
154 BN_ULONG r1, r0;
155
156 __asm__ (
157 "umulh %[r1], %[a], %[b] \n"
158 "mul %[r0], %[a], %[b] \n"
159 "adds %[r0], %[r0], %[c] \n"
160 "adc %[r1], %[r1], xzr \n"
161 "adds %[r0], %[r0], %[d] \n"
162 "adc %[r1], %[r1], xzr \n"
163 : [r1]"=&r"(r1), [r0]"=&r"(r0)
164 : [a]"r"(a), [b]"r"(b), [c]"r"(c), [d]"r"(d)
165 : "cc");
166
167 *out_r1 = r1;
168 *out_r0 = r0;
169}
170
171#define HAVE_BN_MULW_ADDTW
172
173static inline void
174bn_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
175 BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
176{
177 BN_ULONG r2, r1, r0;
178
179 __asm__ (
180 "umulh %[r1], %[a], %[b] \n"
181 "mul %[r0], %[a], %[b] \n"
182 "adds %[r0], %[r0], %[c0] \n"
183 "adcs %[r1], %[r1], %[c1] \n"
184 "adc %[r2], xzr, %[c2] \n"
185 : [r2]"=&r"(r2), [r1]"=&r"(r1), [r0]"=&r"(r0)
186 : [a]"r"(a), [b]"r"(b), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0)
187 : "cc");
188
189 *out_r2 = r2;
190 *out_r1 = r1;
191 *out_r0 = r0;
192}
193
194#define HAVE_BN_MUL2_MULW_ADDTW
195
196static inline void
197bn_mul2_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
198 BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
199{
200 BN_ULONG r2, r1, r0, x1, x0;
201
202 __asm__ (
203 "umulh %[x1], %[a], %[b] \n"
204 "mul %[x0], %[a], %[b] \n"
205 "adds %[r0], %[c0], %[x0] \n"
206 "adcs %[r1], %[c1], %[x1] \n"
207 "adc %[r2], xzr, %[c2] \n"
208 "adds %[r0], %[r0], %[x0] \n"
209 "adcs %[r1], %[r1], %[x1] \n"
210 "adc %[r2], xzr, %[r2] \n"
211 : [r2]"=&r"(r2), [r1]"=&r"(r1), [r0]"=&r"(r0), [x1]"=&r"(x1),
212 [x0]"=&r"(x0)
213 : [a]"r"(a), [b]"r"(b), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0)
214 : "cc");
215
216 *out_r2 = r2;
217 *out_r1 = r1;
218 *out_r0 = r0;
219}
220
221#define HAVE_BN_QWMULW_ADDW
222
223static inline void
224bn_qwmulw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b,
225 BN_ULONG c, BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2,
226 BN_ULONG *out_r1, BN_ULONG *out_r0)
227{
228 BN_ULONG r4, r3, r2, r1, r0;
229
230 __asm__ (
231 "umulh %[r1], %[a0], %[b] \n"
232 "mul %[r0], %[a0], %[b] \n"
233 "adds %[r0], %[r0], %[c] \n"
234 "umulh %[r2], %[a1], %[b] \n"
235 "mul %[c], %[a1], %[b] \n"
236 "adcs %[r1], %[r1], %[c] \n"
237 "umulh %[r3], %[a2], %[b] \n"
238 "mul %[c], %[a2], %[b] \n"
239 "adcs %[r2], %[r2], %[c] \n"
240 "umulh %[r4], %[a3], %[b] \n"
241 "mul %[c], %[a3], %[b] \n"
242 "adcs %[r3], %[r3], %[c] \n"
243 "adc %[r4], %[r4], xzr \n"
244 : [c]"+&r"(c), [r4]"=&r"(r4), [r3]"=&r"(r3), [r2]"=&r"(r2),
245 [r1]"=&r"(r1), [r0]"=&r"(r0)
246 : [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0), [b]"r"(b)
247 : "cc");
248
249 *out_r4 = r4;
250 *out_r3 = r3;
251 *out_r2 = r2;
252 *out_r1 = r1;
253 *out_r0 = r0;
254}
255
256#define HAVE_BN_QWMULW_ADDQW_ADDW
257
258static inline void
259bn_qwmulw_addqw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0,
260 BN_ULONG b, BN_ULONG c3, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0, BN_ULONG d,
261 BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1,
262 BN_ULONG *out_r0)
263{
264 BN_ULONG r4, r3, r2, r1, r0;
265
266 __asm__ (
267 "umulh %[r1], %[a0], %[b] \n"
268 "mul %[r0], %[a0], %[b] \n"
269 "adds %[r0], %[r0], %[d] \n"
270 "umulh %[r2], %[a1], %[b] \n"
271 "mul %[d], %[a1], %[b] \n"
272 "adcs %[r1], %[r1], %[d] \n"
273 "umulh %[r3], %[a2], %[b] \n"
274 "mul %[d], %[a2], %[b] \n"
275 "adcs %[r2], %[r2], %[d] \n"
276 "umulh %[r4], %[a3], %[b] \n"
277 "mul %[d], %[a3], %[b] \n"
278 "adcs %[r3], %[r3], %[d] \n"
279 "adc %[r4], %[r4], xzr \n"
280 "adds %[r0], %[r0], %[c0] \n"
281 "adcs %[r1], %[r1], %[c1] \n"
282 "adcs %[r2], %[r2], %[c2] \n"
283 "adcs %[r3], %[r3], %[c3] \n"
284 "adc %[r4], %[r4], xzr \n"
285 : [d]"+&r"(d), [r4]"=&r"(r4), [r3]"=&r"(r3), [r2]"=&r"(r2),
286 [r1]"=&r"(r1), [r0]"=&r"(r0)
287 : [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0), [b]"r"(b),
288 [c3]"r"(c3), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0)
289 : "cc");
290
291 *out_r4 = r4;
292 *out_r3 = r3;
293 *out_r2 = r2;
294 *out_r1 = r1;
295 *out_r0 = r0;
296}
297
298#define HAVE_BN_SUBW
299
300static inline void
301bn_subw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_borrow, BN_ULONG *out_r0)
302{
303 BN_ULONG borrow, r0;
304
305 __asm__ (
306 "subs %[r0], %[a], %[b] \n"
307 "cset %[borrow], cc \n"
308 : [borrow]"=r"(borrow), [r0]"=r"(r0)
309 : [a]"r"(a), [b]"r"(b)
310 : "cc");
311
312 *out_borrow = borrow;
313 *out_r0 = r0;
314}
315
316#define HAVE_BN_SUBW_SUBW
317
318static inline void
319bn_subw_subw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_borrow,
320 BN_ULONG *out_r0)
321{
322 BN_ULONG borrow, r0;
323
324 __asm__ (
325 "subs %[r0], %[a], %[b] \n"
326 "cset %[borrow], cc \n"
327 "subs %[r0], %[r0], %[c] \n"
328 "cinc %[borrow], %[borrow], cc \n"
329 : [borrow]"=&r"(borrow), [r0]"=&r"(r0)
330 : [a]"r"(a), [b]"r"(b), [c]"r"(c)
331 : "cc");
332
333 *out_borrow = borrow;
334 *out_r0 = r0;
335}
336
337#define HAVE_BN_QWSUBQW
338
339static inline void
340bn_qwsubqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
341 BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG borrow, BN_ULONG *out_borrow,
342 BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
343{
344 BN_ULONG r3, r2, r1, r0;
345
346 __asm__ (
347 "subs xzr, xzr, %[borrow] \n"
348 "sbcs %[r0], %[a0], %[b0] \n"
349 "sbcs %[r1], %[a1], %[b1] \n"
350 "sbcs %[r2], %[a2], %[b2] \n"
351 "sbcs %[r3], %[a3], %[b3] \n"
352 "cset %[borrow], cc \n"
353 : [borrow]"+r"(borrow), [r3]"=&r"(r3), [r2]"=&r"(r2),
354 [r1]"=&r"(r1), [r0]"=&r"(r0)
355 : [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0),
356 [b3]"r"(b3), [b2]"r"(b2), [b1]"r"(b1), [b0]"r"(b0)
357 : "cc");
358
359 *out_borrow = borrow;
360 *out_r3 = r3;
361 *out_r2 = r2;
362 *out_r1 = r1;
363 *out_r0 = r0;
364}
365
366#endif /* __GNUC__ */
367
368#endif
369#endif
diff --git a/src/lib/libcrypto/bn/arch/alpha/bn_arch.h b/src/lib/libcrypto/bn/arch/alpha/bn_arch.h
deleted file mode 100644
index 5bf4ba8722..0000000000
--- a/src/lib/libcrypto/bn/arch/alpha/bn_arch.h
+++ /dev/null
@@ -1,44 +0,0 @@
1/* $OpenBSD: bn_arch.h,v 1.4 2023/02/16 10:41:03 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#ifndef HEADER_BN_ARCH_H
19#define HEADER_BN_ARCH_H
20
21#ifndef OPENSSL_NO_ASM
22
23#if 0 /* Needs testing and enabling. */
24#if defined(__GNUC__)
25#define HAVE_BN_MULW
26
27static inline void
28bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
29{
30 BN_ULONG r1, r0;
31
32 /* Unsigned multiplication using a umulh/mulq pair. */
33 __asm__ ("umulh %2, %3, %0; mulq %2, %3, %1"
34 : "=&r"(r1), "=r"(r0)
35 : "r"(a), "r"(b));
36
37 *out_r1 = r1;
38 *out_r0 = r0;
39}
40#endif /* __GNUC__ */
41#endif
42
43#endif
44#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S b/src/lib/libcrypto/bn/arch/amd64/bignum_add.S
deleted file mode 100644
index 5fe4aae7a1..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S
+++ /dev/null
@@ -1,165 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Add, z := x + y
17// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
18//
19// extern uint64_t bignum_add
20// (uint64_t p, uint64_t *z,
21// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
22//
23// Does the z := x + y operation, truncating modulo p words in general and
24// returning a top carry (0 or 1) in the p'th place, only adding the input
25// words below p (as well as m and n respectively) to get the sum and carry.
26//
27// Standard x86-64 ABI: RDI = p, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y, returns RAX
28// Microsoft x64 ABI: RCX = p, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y, returns RAX
29// ----------------------------------------------------------------------------
30
31#include "s2n_bignum_internal.h"
32
33 .intel_syntax noprefix
34 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_add)
35 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_add)
36 .text
37
38#define p rdi
39#define z rsi
40#define m rdx
41#define x rcx
42#define n r8
43#define y r9
44#define i r10
45#define a rax
46
47#define ashort eax
48
49
50
51S2N_BN_SYMBOL(bignum_add):
52 _CET_ENDBR
53
54#if WINDOWS_ABI
55 push rdi
56 push rsi
57 mov rdi, rcx
58 mov rsi, rdx
59 mov rdx, r8
60 mov rcx, r9
61 mov r8, [rsp+56]
62 mov r9, [rsp+64]
63#endif
64
65// Zero the main index counter for both branches
66
67 xor i, i
68
69// First clamp the two input sizes m := min(p,m) and n := min(p,n) since
70// we'll never need words past the p'th. Can now assume m <= p and n <= p.
71// Then compare the modified m and n and branch accordingly
72
73 cmp p, m
74 cmovc m, p
75 cmp p, n
76 cmovc n, p
77 cmp m, n
78 jc ylonger
79
80// The case where x is longer or of the same size (p >= m >= n)
81
82 sub p, m
83 sub m, n
84 inc m
85 test n, n
86 jz xtest
87xmainloop:
88 mov a, [x+8*i]
89 adc a, [y+8*i]
90 mov [z+8*i],a
91 inc i
92 dec n
93 jnz xmainloop
94 jmp xtest
95xtoploop:
96 mov a, [x+8*i]
97 adc a, 0
98 mov [z+8*i],a
99 inc i
100xtest:
101 dec m
102 jnz xtoploop
103 mov ashort, 0
104 adc a, 0
105 test p, p
106 jnz tails
107#if WINDOWS_ABI
108 pop rsi
109 pop rdi
110#endif
111 ret
112
113// The case where y is longer (p >= n > m)
114
115ylonger:
116
117 sub p, n
118 sub n, m
119 test m, m
120 jz ytoploop
121ymainloop:
122 mov a, [x+8*i]
123 adc a, [y+8*i]
124 mov [z+8*i],a
125 inc i
126 dec m
127 jnz ymainloop
128ytoploop:
129 mov a, [y+8*i]
130 adc a, 0
131 mov [z+8*i],a
132 inc i
133 dec n
134 jnz ytoploop
135 mov ashort, 0
136 adc a, 0
137 test p, p
138 jnz tails
139#if WINDOWS_ABI
140 pop rsi
141 pop rdi
142#endif
143 ret
144
145// Adding a non-trivial tail, when p > max(m,n)
146
147tails:
148 mov [z+8*i],a
149 xor a, a
150 jmp tail
151tailloop:
152 mov [z+8*i],a
153tail:
154 inc i
155 dec p
156 jnz tailloop
157#if WINDOWS_ABI
158 pop rsi
159 pop rdi
160#endif
161 ret
162
163#if defined(__linux__) && defined(__ELF__)
164.section .note.GNU-stack,"",%progbits
165#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S
deleted file mode 100644
index 25ba17bce2..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S
+++ /dev/null
@@ -1,155 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Multiply-add with single-word multiplier, z := z + c * y
17// Inputs c, y[n]; outputs function return (carry-out) and z[k]
18//
19// extern uint64_t bignum_cmadd
20// (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y);
21//
22// Does the "z := z + c * y" operation where y is n digits, result z is p.
23// Truncates the result in general.
24//
25// The return value is a high/carry word that is meaningful when p = n + 1, or
26// more generally when n <= p and the result fits in p + 1 digits. In these
27// cases it gives the top digit of the (p + 1)-digit result.
28//
29// Standard x86-64 ABI: RDI = k, RSI = z, RDX = c, RCX = n, R8 = y, returns RAX
30// Microsoft x64 ABI: RCX = k, RDX = z, R8 = c, R9 = n, [RSP+40] = y, returns RAX
31// ----------------------------------------------------------------------------
32
33#include "s2n_bignum_internal.h"
34
35 .intel_syntax noprefix
36 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmadd)
37 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmadd)
38 .text
39
40#define p rdi
41#define z rsi
42#define c r9
43#define n rcx
44#define x r8
45
46#define i r10
47#define h r11
48
49#define r rbx
50
51#define hshort r11d
52#define ishort r10d
53
54
55
56S2N_BN_SYMBOL(bignum_cmadd):
57 _CET_ENDBR
58
59#if WINDOWS_ABI
60 push rdi
61 push rsi
62 mov rdi, rcx
63 mov rsi, rdx
64 mov rdx, r8
65 mov rcx, r9
66 mov r8, [rsp+56]
67#endif
68
69// Seems hard to avoid one more register
70
71 push rbx
72
73// First clamp the input size n := min(p,n) since we can never need to read
74// past the p'th term of the input to generate p-digit output.
75// Subtract p := p - min(n,p) so it holds the size of the extra tail needed
76
77 cmp p, n
78 cmovc n, p
79 sub p, n
80
81// Initialize high part h = 0; if n = 0 do nothing but return that zero
82
83 xor h, h
84 test n, n
85 jz end
86
87// Move c into a safer register as multiplies overwrite rdx
88
89 mov c, rdx
90
91// Initialization of the loop: 2^64 * CF + [h,z_0'] = z_0 + c * x_0
92
93 mov rax, [x]
94 mul c
95 add [z], rax
96 mov h, rdx
97 mov ishort, 1
98 dec n
99 jz hightail
100
101// Main loop, where we always have CF + previous high part h to add in
102
103loop:
104 adc h, [z+8*i]
105 sbb r, r
106 mov rax, [x+8*i]
107 mul c
108 sub rdx, r
109 add rax, h
110 mov [z+8*i], rax
111 mov h, rdx
112 inc i
113 dec n
114 jnz loop
115
116hightail:
117 adc h, 0
118
119// Propagate the carry all the way to the end with h as extra carry word
120
121tail:
122 test p, p
123 jz end
124
125 add [z+8*i], h
126 mov hshort, 0
127 inc i
128 dec p
129 jz highend
130
131tloop:
132 adc [z+8*i], h
133 inc i
134 dec p
135 jnz tloop
136
137highend:
138
139 adc h, 0
140
141// Return the high/carry word
142
143end:
144 mov rax, h
145
146 pop rbx
147#if WINDOWS_ABI
148 pop rsi
149 pop rdi
150#endif
151 ret
152
153#if defined(__linux__) && defined(__ELF__)
154.section .note.GNU-stack,"",%progbits
155#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S
deleted file mode 100644
index 12f785d63a..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S
+++ /dev/null
@@ -1,138 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Multiply by a single word, z := c * y
17// Inputs c, y[n]; outputs function return (carry-out) and z[k]
18//
19// extern uint64_t bignum_cmul
20// (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y);
21//
22// Does the "z := c * y" operation where y is n digits, result z is p.
23// Truncates the result in general unless p >= n + 1.
24//
25// The return value is a high/carry word that is meaningful when p >= n as
26// giving the high part of the result. Since this is always zero if p > n,
27// it is mainly of interest in the special case p = n, i.e. where the source
28// and destination have the same nominal size, when it gives the extra word
29// of the full result.
30//
31// Standard x86-64 ABI: RDI = k, RSI = z, RDX = c, RCX = n, R8 = y, returns RAX
32// Microsoft x64 ABI: RCX = k, RDX = z, R8 = c, R9 = n, [RSP+40] = y, returns RAX
33// ----------------------------------------------------------------------------
34
35#include "s2n_bignum_internal.h"
36
37 .intel_syntax noprefix
38 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul)
39 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul)
40 .text
41
42#define p rdi
43#define z rsi
44#define c r9
45#define n rcx
46#define x r8
47
48#define i r10
49#define h r11
50
51
52
53S2N_BN_SYMBOL(bignum_cmul):
54 _CET_ENDBR
55
56#if WINDOWS_ABI
57 push rdi
58 push rsi
59 mov rdi, rcx
60 mov rsi, rdx
61 mov rdx, r8
62 mov rcx, r9
63 mov r8, [rsp+56]
64#endif
65
66// First clamp the input size n := min(p,n) since we can never need to read
67// past the p'th term of the input to generate p-digit output. Now we can
68// assume that n <= p
69
70 cmp p, n
71 cmovc n, p
72
73// Initialize current input/output pointer offset i and high part h.
74// But then if n = 0 skip the multiplication and go to the tail part
75
76 xor h, h
77 xor i, i
78 test n, n
79 jz tail
80
81// Move c into a safer register as multiplies overwrite rdx
82
83 mov c, rdx
84
85// Initialization of the loop: [h,l] = c * x_0
86
87 mov rax, [x]
88 mul c
89 mov [z], rax
90 mov h, rdx
91 inc i
92 cmp i, n
93 jz tail
94
95// Main loop doing the multiplications
96
97loop:
98 mov rax, [x+8*i]
99 mul c
100 add rax, h
101 adc rdx, 0
102 mov [z+8*i], rax
103 mov h, rdx
104 inc i
105 cmp i, n
106 jc loop
107
108// Add a tail when the destination is longer
109
110tail:
111 cmp i, p
112 jnc end
113 mov [z+8*i], h
114 xor h, h
115 inc i
116 cmp i, p
117 jnc end
118
119tloop:
120 mov [z+8*i], h
121 inc i
122 cmp i, p
123 jc tloop
124
125// Return the high/carry word
126
127end:
128 mov rax, h
129
130#if WINDOWS_ABI
131 pop rsi
132 pop rdi
133#endif
134 ret
135
136#if defined(__linux__) && defined(__ELF__)
137.section .note.GNU-stack,"",%progbits
138#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S
deleted file mode 100644
index a3552679a2..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S
+++ /dev/null
@@ -1,167 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Multiply z := x * y
17// Inputs x[m], y[n]; output z[k]
18//
19// extern void bignum_mul
20// (uint64_t k, uint64_t *z,
21// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
22//
23// Does the "z := x * y" operation where x is m digits, y is n, result z is k.
24// Truncates the result in general unless k >= m + n
25//
26// Standard x86-64 ABI: RDI = k, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y
27// Microsoft x64 ABI: RCX = k, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y
28// ----------------------------------------------------------------------------
29
30#include "s2n_bignum_internal.h"
31
32 .intel_syntax noprefix
33 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul)
34 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul)
35 .text
36
37// These are actually right
38
39#define p rdi
40#define z rsi
41#define n r8
42
43// These are not
44
45#define c r15
46#define h r14
47#define l r13
48#define x r12
49#define y r11
50#define i rbx
51#define k r10
52#define m rbp
53
54// These are always local scratch since multiplier result is in these
55
56#define a rax
57#define d rdx
58
59
60
61S2N_BN_SYMBOL(bignum_mul):
62 _CET_ENDBR
63
64#if WINDOWS_ABI
65 push rdi
66 push rsi
67 mov rdi, rcx
68 mov rsi, rdx
69 mov rdx, r8
70 mov rcx, r9
71 mov r8, [rsp+56]
72 mov r9, [rsp+64]
73#endif
74
75// We use too many registers, and also we need rax:rdx for multiplications
76
77 push rbx
78 push rbp
79 push r12
80 push r13
81 push r14
82 push r15
83 mov m, rdx
84
85// If the result size is zero, do nothing
86// Note that even if either or both inputs has size zero, we can't
87// just give up because we at least need to zero the output array
88// If we did a multiply-add variant, however, then we could
89
90 test p, p
91 jz end
92
93// Set initial 2-part sum to zero (we zero c inside the body)
94
95 xor h,h
96 xor l,l
97
98// Otherwise do outer loop k = 0 ... k = p - 1
99
100 xor k, k
101
102outerloop:
103
104// Zero our carry term first; we eventually want it and a zero is useful now
105// Set a = max 0 (k + 1 - n), i = min (k + 1) m
106// This defines the range a <= j < i for the inner summation
107// Note that since k < p < 2^64 we can assume k + 1 doesn't overflow
108// And since we want to increment it anyway, we might as well do it now
109
110 xor c, c // c = 0
111 inc k // k = k + 1
112
113 mov a, k // a = k + 1
114 sub a, n // a = k + 1 - n
115 cmovc a, c // a = max 0 (k + 1 - n)
116
117 mov i, m // i = m
118 cmp k, m // CF <=> k + 1 < m
119 cmovc i, k // i = min (k + 1) m
120
121// Turn i into a loop count, and skip things if it's <= 0
122// Otherwise set up initial pointers x -> x0[a] and y -> y0[k - a]
123// and then launch into the main inner loop, postdecrementing i
124
125 mov d, k
126 sub d, i
127 sub i, a
128 jbe innerend
129 lea x,[rcx+8*a]
130 lea y,[r9+8*d-8]
131
132innerloop:
133 mov rax, [y+8*i]
134 mul QWORD PTR [x]
135 add x, 8
136 add l, rax
137 adc h, rdx
138 adc c, 0
139 dec i
140 jnz innerloop
141
142innerend:
143
144 mov [z], l
145 mov l, h
146 mov h, c
147 add z, 8
148
149 cmp k, p
150 jc outerloop
151
152end:
153 pop r15
154 pop r14
155 pop r13
156 pop r12
157 pop rbp
158 pop rbx
159#if WINDOWS_ABI
160 pop rsi
161 pop rdi
162#endif
163 ret
164
165#if defined(__linux__) && defined(__ELF__)
166.section .note.GNU-stack,"",%progbits
167#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S
deleted file mode 100644
index 70ff69e372..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S
+++ /dev/null
@@ -1,157 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Multiply z := x * y
17// Inputs x[4], y[4]; output z[8]
18//
19// extern void bignum_mul_4_8_alt
20// (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]);
21//
22// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y
23// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y
24// ----------------------------------------------------------------------------
25
26#include "s2n_bignum_internal.h"
27
28 .intel_syntax noprefix
29 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_4_8_alt)
30 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_4_8_alt)
31 .text
32
33// These are actually right
34
35#define z rdi
36#define x rsi
37
38// This is moved from rdx to free it for muls
39
40#define y rcx
41
42// Other variables used as a rotating 3-word window to add terms to
43
44#define t0 r8
45#define t1 r9
46#define t2 r10
47
48// Macro for the key "multiply and add to (c,h,l)" step
49
50#define combadd(c,h,l,numa,numb) \
51 mov rax, numa; \
52 mul QWORD PTR numb; \
53 add l, rax; \
54 adc h, rdx; \
55 adc c, 0
56
57// A minutely shorter form for when c = 0 initially
58
59#define combadz(c,h,l,numa,numb) \
60 mov rax, numa; \
61 mul QWORD PTR numb; \
62 add l, rax; \
63 adc h, rdx; \
64 adc c, c
65
66// A short form where we don't expect a top carry
67
68#define combads(h,l,numa,numb) \
69 mov rax, numa; \
70 mul QWORD PTR numb; \
71 add l, rax; \
72 adc h, rdx
73
74S2N_BN_SYMBOL(bignum_mul_4_8_alt):
75 _CET_ENDBR
76
77#if WINDOWS_ABI
78 push rdi
79 push rsi
80 mov rdi, rcx
81 mov rsi, rdx
82 mov rdx, r8
83#endif
84
85// Copy y into a safe register to start with
86
87 mov y, rdx
88
89// Result term 0
90
91 mov rax, [x]
92 mul QWORD PTR [y]
93
94 mov [z], rax
95 mov t0, rdx
96 xor t1, t1
97
98// Result term 1
99
100 xor t2, t2
101 combads(t1,t0,[x],[y+8])
102 combadz(t2,t1,t0,[x+8],[y])
103 mov [z+8], t0
104
105// Result term 2
106
107 xor t0, t0
108 combadz(t0,t2,t1,[x],[y+16])
109 combadd(t0,t2,t1,[x+8],[y+8])
110 combadd(t0,t2,t1,[x+16],[y])
111 mov [z+16], t1
112
113// Result term 3
114
115 xor t1, t1
116 combadz(t1,t0,t2,[x],[y+24])
117 combadd(t1,t0,t2,[x+8],[y+16])
118 combadd(t1,t0,t2,[x+16],[y+8])
119 combadd(t1,t0,t2,[x+24],[y])
120 mov [z+24], t2
121
122// Result term 4
123
124 xor t2, t2
125 combadz(t2,t1,t0,[x+8],[y+24])
126 combadd(t2,t1,t0,[x+16],[y+16])
127 combadd(t2,t1,t0,[x+24],[y+8])
128 mov [z+32], t0
129
130// Result term 5
131
132 xor t0, t0
133 combadz(t0,t2,t1,[x+16],[y+24])
134 combadd(t0,t2,t1,[x+24],[y+16])
135 mov [z+40], t1
136
137// Result term 6
138
139 xor t1, t1
140 combads(t0,t2,[x+24],[y+24])
141 mov [z+48], t2
142
143// Result term 7
144
145 mov [z+56], t0
146
147// Return
148
149#if WINDOWS_ABI
150 pop rsi
151 pop rdi
152#endif
153 ret
154
155#if defined(__linux__) && defined(__ELF__)
156.section .note.GNU-stack,"",%progbits
157#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S
deleted file mode 100644
index 066403b074..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S
+++ /dev/null
@@ -1,244 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Multiply z := x * y
17// Inputs x[8], y[8]; output z[16]
18//
19// extern void bignum_mul_8_16_alt
20// (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]);
21//
22// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y
23// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y
24// ----------------------------------------------------------------------------
25
26#include "s2n_bignum_internal.h"
27
28 .intel_syntax noprefix
29 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_8_16_alt)
30 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_8_16_alt)
31 .text
32
33// These are actually right
34
35#define z rdi
36#define x rsi
37
38// This is moved from rdx to free it for muls
39
40#define y rcx
41
42// Other variables used as a rotating 3-word window to add terms to
43
44#define t0 r8
45#define t1 r9
46#define t2 r10
47
48// Macro for the key "multiply and add to (c,h,l)" step
49
50#define combadd(c,h,l,numa,numb) \
51 mov rax, numa; \
52 mul QWORD PTR numb; \
53 add l, rax; \
54 adc h, rdx; \
55 adc c, 0
56
57// A minutely shorter form for when c = 0 initially
58
59#define combadz(c,h,l,numa,numb) \
60 mov rax, numa; \
61 mul QWORD PTR numb; \
62 add l, rax; \
63 adc h, rdx; \
64 adc c, c
65
66// A short form where we don't expect a top carry
67
68#define combads(h,l,numa,numb) \
69 mov rax, numa; \
70 mul QWORD PTR numb; \
71 add l, rax; \
72 adc h, rdx
73
74S2N_BN_SYMBOL(bignum_mul_8_16_alt):
75 _CET_ENDBR
76
77#if WINDOWS_ABI
78 push rdi
79 push rsi
80 mov rdi, rcx
81 mov rsi, rdx
82 mov rdx, r8
83#endif
84
85// Copy y into a safe register to start with
86
87 mov y, rdx
88
89// Result term 0
90
91 mov rax, [x]
92 mul QWORD PTR [y]
93
94 mov [z], rax
95 mov t0, rdx
96 xor t1, t1
97
98// Result term 1
99
100 xor t2, t2
101 combads(t1,t0,[x],[y+8])
102 combadz(t2,t1,t0,[x+8],[y])
103 mov [z+8], t0
104
105// Result term 2
106
107 xor t0, t0
108 combadz(t0,t2,t1,[x],[y+16])
109 combadd(t0,t2,t1,[x+8],[y+8])
110 combadd(t0,t2,t1,[x+16],[y])
111 mov [z+16], t1
112
113// Result term 3
114
115 xor t1, t1
116 combadz(t1,t0,t2,[x],[y+24])
117 combadd(t1,t0,t2,[x+8],[y+16])
118 combadd(t1,t0,t2,[x+16],[y+8])
119 combadd(t1,t0,t2,[x+24],[y])
120 mov [z+24], t2
121
122// Result term 4
123
124 xor t2, t2
125 combadz(t2,t1,t0,[x],[y+32])
126 combadd(t2,t1,t0,[x+8],[y+24])
127 combadd(t2,t1,t0,[x+16],[y+16])
128 combadd(t2,t1,t0,[x+24],[y+8])
129 combadd(t2,t1,t0,[x+32],[y])
130 mov [z+32], t0
131
132// Result term 5
133
134 xor t0, t0
135 combadz(t0,t2,t1,[x],[y+40])
136 combadd(t0,t2,t1,[x+8],[y+32])
137 combadd(t0,t2,t1,[x+16],[y+24])
138 combadd(t0,t2,t1,[x+24],[y+16])
139 combadd(t0,t2,t1,[x+32],[y+8])
140 combadd(t0,t2,t1,[x+40],[y])
141 mov [z+40], t1
142
143// Result term 6
144
145 xor t1, t1
146 combadz(t1,t0,t2,[x],[y+48])
147 combadd(t1,t0,t2,[x+8],[y+40])
148 combadd(t1,t0,t2,[x+16],[y+32])
149 combadd(t1,t0,t2,[x+24],[y+24])
150 combadd(t1,t0,t2,[x+32],[y+16])
151 combadd(t1,t0,t2,[x+40],[y+8])
152 combadd(t1,t0,t2,[x+48],[y])
153 mov [z+48], t2
154
155// Result term 7
156
157 xor t2, t2
158 combadz(t2,t1,t0,[x],[y+56])
159 combadd(t2,t1,t0,[x+8],[y+48])
160 combadd(t2,t1,t0,[x+16],[y+40])
161 combadd(t2,t1,t0,[x+24],[y+32])
162 combadd(t2,t1,t0,[x+32],[y+24])
163 combadd(t2,t1,t0,[x+40],[y+16])
164 combadd(t2,t1,t0,[x+48],[y+8])
165 combadd(t2,t1,t0,[x+56],[y])
166 mov [z+56], t0
167
168// Result term 8
169
170 xor t0, t0
171 combadz(t0,t2,t1,[x+8],[y+56])
172 combadd(t0,t2,t1,[x+16],[y+48])
173 combadd(t0,t2,t1,[x+24],[y+40])
174 combadd(t0,t2,t1,[x+32],[y+32])
175 combadd(t0,t2,t1,[x+40],[y+24])
176 combadd(t0,t2,t1,[x+48],[y+16])
177 combadd(t0,t2,t1,[x+56],[y+8])
178 mov [z+64], t1
179
180// Result term 9
181
182 xor t1, t1
183 combadz(t1,t0,t2,[x+16],[y+56])
184 combadd(t1,t0,t2,[x+24],[y+48])
185 combadd(t1,t0,t2,[x+32],[y+40])
186 combadd(t1,t0,t2,[x+40],[y+32])
187 combadd(t1,t0,t2,[x+48],[y+24])
188 combadd(t1,t0,t2,[x+56],[y+16])
189 mov [z+72], t2
190
191// Result term 10
192
193 xor t2, t2
194 combadz(t2,t1,t0,[x+24],[y+56])
195 combadd(t2,t1,t0,[x+32],[y+48])
196 combadd(t2,t1,t0,[x+40],[y+40])
197 combadd(t2,t1,t0,[x+48],[y+32])
198 combadd(t2,t1,t0,[x+56],[y+24])
199 mov [z+80], t0
200
201// Result term 11
202
203 xor t0, t0
204 combadz(t0,t2,t1,[x+32],[y+56])
205 combadd(t0,t2,t1,[x+40],[y+48])
206 combadd(t0,t2,t1,[x+48],[y+40])
207 combadd(t0,t2,t1,[x+56],[y+32])
208 mov [z+88], t1
209
210// Result term 12
211
212 xor t1, t1
213 combadz(t1,t0,t2,[x+40],[y+56])
214 combadd(t1,t0,t2,[x+48],[y+48])
215 combadd(t1,t0,t2,[x+56],[y+40])
216 mov [z+96], t2
217
218// Result term 13
219
220 xor t2, t2
221 combadz(t2,t1,t0,[x+48],[y+56])
222 combadd(t2,t1,t0,[x+56],[y+48])
223 mov [z+104], t0
224
225// Result term 14
226
227 combads(t2,t1,[x+56],[y+56])
228 mov [z+112], t1
229
230// Result term 11
231
232 mov [z+120], t2
233
234// Return
235
236#if WINDOWS_ABI
237 pop rsi
238 pop rdi
239#endif
240 ret
241
242#if defined(__linux__) && defined(__ELF__)
243.section .note.GNU-stack,"",%progbits
244#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S
deleted file mode 100644
index 54e3f59442..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S
+++ /dev/null
@@ -1,197 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Square z := x^2
17// Input x[n]; output z[k]
18//
19// extern void bignum_sqr
20// (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x);
21//
22// Does the "z := x^2" operation where x is n digits and result z is k.
23// Truncates the result in general unless k >= 2 * n
24//
25// Standard x86-64 ABI: RDI = k, RSI = z, RDX = n, RCX = x
26// Microsoft x64 ABI: RCX = k, RDX = z, R8 = n, R9 = x
27// ----------------------------------------------------------------------------
28
29#include "s2n_bignum_internal.h"
30
31 .intel_syntax noprefix
32 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr)
33 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr)
34 .text
35
36// First three are where arguments come in, but n is moved.
37
38#define p rdi
39#define z rsi
40#define x rcx
41#define n r8
42
43// These are always local scratch since multiplier result is in these
44
45#define a rax
46#define d rdx
47
48// Other variables
49
50#define i rbx
51#define ll rbp
52#define hh r9
53#define k r10
54#define y r11
55#define htop r12
56#define l r13
57#define h r14
58#define c r15
59
60// Short versions
61
62#define llshort ebp
63
64S2N_BN_SYMBOL(bignum_sqr):
65 _CET_ENDBR
66
67#if WINDOWS_ABI
68 push rdi
69 push rsi
70 mov rdi, rcx
71 mov rsi, rdx
72 mov rdx, r8
73 mov rcx, r9
74#endif
75
76// We use too many registers, and also we need rax:rdx for multiplications
77
78 push rbx
79 push rbp
80 push r12
81 push r13
82 push r14
83 push r15
84 mov n, rdx
85
86// If p = 0 the result is trivial and nothing needs doing
87
88 test p, p
89 jz end
90
91// initialize (hh,ll) = 0
92
93 xor llshort, llshort
94 xor hh, hh
95
96// Iterate outer loop from k = 0 ... k = p - 1 producing result digits
97
98 xor k, k
99
100outerloop:
101
102// First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n
103// We want to accumulate all x[i] * x[k - i] for bot <= i < top
104// For the optimization of squaring we avoid duplication and do
105// 2 * x[i] * x[k - i] for i < htop, where htop = MIN ((k+1)/2) n
106// Initialize i = bot; in fact just compute bot as i directly.
107
108 xor c, c
109 lea i, [k+1]
110 mov htop, i
111 shr htop, 1
112 sub i, n
113 cmovc i, c
114 cmp htop, n
115 cmovnc htop, n
116
117// Initialize the three-part local sum (c,h,l); c was already done above
118
119 xor l, l
120 xor h, h
121
122// If htop <= bot then main doubled part of the sum is empty
123
124 cmp i, htop
125 jnc nosumming
126
127// Use a moving pointer for [y] = x[k-i] for the cofactor
128
129 mov a, k
130 sub a, i
131 lea y, [x+8*a]
132
133// Do the main part of the sum x[i] * x[k - i] for 2 * i < k
134
135innerloop:
136 mov a, [x+8*i]
137 mul QWORD PTR [y]
138 add l, a
139 adc h, d
140 adc c, 0
141 sub y, 8
142 inc i
143 cmp i, htop
144 jc innerloop
145
146// Now double it
147
148 add l, l
149 adc h, h
150 adc c, c
151
152// If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term
153
154nosumming:
155 test k, 1
156 jnz innerend
157 cmp i, n
158 jnc innerend
159
160 mov a, [x+8*i]
161 mul a
162 add l, a
163 adc h, d
164 adc c, 0
165
166// Now add the local sum into the global sum, store and shift
167
168innerend:
169 add l, ll
170 mov [z+8*k], l
171 adc h, hh
172 mov ll, h
173 adc c, 0
174 mov hh, c
175
176 inc k
177 cmp k, p
178 jc outerloop
179
180// Restore registers and return
181
182end:
183 pop r15
184 pop r14
185 pop r13
186 pop r12
187 pop rbp
188 pop rbx
189#if WINDOWS_ABI
190 pop rsi
191 pop rdi
192#endif
193 ret
194
195#if defined(__linux__) && defined(__ELF__)
196.section .note.GNU-stack,"",%progbits
197#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S
deleted file mode 100644
index 7c534ae907..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S
+++ /dev/null
@@ -1,145 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Square, z := x^2
17// Input x[4]; output z[8]
18//
19// extern void bignum_sqr_4_8_alt
20// (uint64_t z[static 8], uint64_t x[static 4]);
21//
22// Standard x86-64 ABI: RDI = z, RSI = x
23// Microsoft x64 ABI: RCX = z, RDX = x
24// ----------------------------------------------------------------------------
25
26#include "s2n_bignum_internal.h"
27
28 .intel_syntax noprefix
29 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_4_8_alt)
30 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_4_8_alt)
31 .text
32
33// Input arguments
34
35#define z rdi
36#define x rsi
37
38// Other variables used as a rotating 3-word window to add terms to
39
40#define t0 rcx
41#define t1 r8
42#define t2 r9
43
44// Macro for the key "multiply and add to (c,h,l)" step, for square term
45
46#define combadd1(c,h,l,numa) \
47 mov rax, numa; \
48 mul rax; \
49 add l, rax; \
50 adc h, rdx; \
51 adc c, 0
52
53// A short form where we don't expect a top carry
54
55#define combads(h,l,numa) \
56 mov rax, numa; \
57 mul rax; \
58 add l, rax; \
59 adc h, rdx
60
61// A version doubling before adding, for non-square terms
62
63#define combadd2(c,h,l,numa,numb) \
64 mov rax, numa; \
65 mul QWORD PTR numb; \
66 add rax, rax; \
67 adc rdx, rdx; \
68 adc c, 0; \
69 add l, rax; \
70 adc h, rdx; \
71 adc c, 0
72
73S2N_BN_SYMBOL(bignum_sqr_4_8_alt):
74 _CET_ENDBR
75
76#if WINDOWS_ABI
77 push rdi
78 push rsi
79 mov rdi, rcx
80 mov rsi, rdx
81#endif
82
83// Result term 0
84
85 mov rax, [x]
86 mul rax
87
88 mov [z], rax
89 mov t0, rdx
90 xor t1, t1
91
92// Result term 1
93
94 xor t2, t2
95 combadd2(t2,t1,t0,[x],[x+8])
96 mov [z+8], t0
97
98// Result term 2
99
100 xor t0, t0
101 combadd1(t0,t2,t1,[x+8])
102 combadd2(t0,t2,t1,[x],[x+16])
103 mov [z+16], t1
104
105// Result term 3
106
107 xor t1, t1
108 combadd2(t1,t0,t2,[x],[x+24])
109 combadd2(t1,t0,t2,[x+8],[x+16])
110 mov [z+24], t2
111
112// Result term 4
113
114 xor t2, t2
115 combadd2(t2,t1,t0,[x+8],[x+24])
116 combadd1(t2,t1,t0,[x+16])
117 mov [z+32], t0
118
119// Result term 5
120
121 xor t0, t0
122 combadd2(t0,t2,t1,[x+16],[x+24])
123 mov [z+40], t1
124
125// Result term 6
126
127 xor t1, t1
128 combads(t0,t2,[x+24])
129 mov [z+48], t2
130
131// Result term 7
132
133 mov [z+56], t0
134
135// Return
136
137#if WINDOWS_ABI
138 pop rsi
139 pop rdi
140#endif
141 ret
142
143#if defined(__linux__) && defined(__ELF__)
144.section .note.GNU-stack,"",%progbits
145#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S
deleted file mode 100644
index ac0b6f96c2..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S
+++ /dev/null
@@ -1,242 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Square, z := x^2
17// Input x[8]; output z[16]
18//
19// extern void bignum_sqr_8_16_alt (uint64_t z[static 16], uint64_t x[static 8]);
20//
21// Standard x86-64 ABI: RDI = z, RSI = x
22// Microsoft x64 ABI: RCX = z, RDX = x
23// ----------------------------------------------------------------------------
24
25#include "s2n_bignum_internal.h"
26
27 .intel_syntax noprefix
28 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_8_16_alt)
29 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_8_16_alt)
30 .text
31
32// Input arguments
33
34#define z rdi
35#define x rsi
36
37// Other variables used as a rotating 3-word window to add terms to
38
39#define t0 r8
40#define t1 r9
41#define t2 r10
42
43// Additional temporaries for local windows to share doublings
44
45#define u0 rcx
46#define u1 r11
47
48// Macro for the key "multiply and add to (c,h,l)" step
49
50#define combadd(c,h,l,numa,numb) \
51 mov rax, numa; \
52 mul QWORD PTR numb; \
53 add l, rax; \
54 adc h, rdx; \
55 adc c, 0
56
57// Set up initial window (c,h,l) = numa * numb
58
59#define combaddz(c,h,l,numa,numb) \
60 mov rax, numa; \
61 mul QWORD PTR numb; \
62 xor c, c; \
63 mov l, rax; \
64 mov h, rdx
65
66// Doubling step (c,h,l) = 2 * (c,hh,ll) + (0,h,l)
67
68#define doubladd(c,h,l,hh,ll) \
69 add ll, ll; \
70 adc hh, hh; \
71 adc c, c; \
72 add l, ll; \
73 adc h, hh; \
74 adc c, 0
75
76// Square term incorporation (c,h,l) += numba^2
77
78#define combadd1(c,h,l,numa) \
79 mov rax, numa; \
80 mul rax; \
81 add l, rax; \
82 adc h, rdx; \
83 adc c, 0
84
85// A short form where we don't expect a top carry
86
87#define combads(h,l,numa) \
88 mov rax, numa; \
89 mul rax; \
90 add l, rax; \
91 adc h, rdx
92
93// A version doubling directly before adding, for single non-square terms
94
95#define combadd2(c,h,l,numa,numb) \
96 mov rax, numa; \
97 mul QWORD PTR numb; \
98 add rax, rax; \
99 adc rdx, rdx; \
100 adc c, 0; \
101 add l, rax; \
102 adc h, rdx; \
103 adc c, 0
104
105S2N_BN_SYMBOL(bignum_sqr_8_16_alt):
106 _CET_ENDBR
107
108#if WINDOWS_ABI
109 push rdi
110 push rsi
111 mov rdi, rcx
112 mov rsi, rdx
113#endif
114
115// Result term 0
116
117 mov rax, [x]
118 mul rax
119
120 mov [z], rax
121 mov t0, rdx
122 xor t1, t1
123
124// Result term 1
125
126 xor t2, t2
127 combadd2(t2,t1,t0,[x],[x+8])
128 mov [z+8], t0
129
130// Result term 2
131
132 xor t0, t0
133 combadd1(t0,t2,t1,[x+8])
134 combadd2(t0,t2,t1,[x],[x+16])
135 mov [z+16], t1
136
137// Result term 3
138
139 combaddz(t1,u1,u0,[x],[x+24])
140 combadd(t1,u1,u0,[x+8],[x+16])
141 doubladd(t1,t0,t2,u1,u0)
142 mov [z+24], t2
143
144// Result term 4
145
146 combaddz(t2,u1,u0,[x],[x+32])
147 combadd(t2,u1,u0,[x+8],[x+24])
148 doubladd(t2,t1,t0,u1,u0)
149 combadd1(t2,t1,t0,[x+16])
150 mov [z+32], t0
151
152// Result term 5
153
154 combaddz(t0,u1,u0,[x],[x+40])
155 combadd(t0,u1,u0,[x+8],[x+32])
156 combadd(t0,u1,u0,[x+16],[x+24])
157 doubladd(t0,t2,t1,u1,u0)
158 mov [z+40], t1
159
160// Result term 6
161
162 combaddz(t1,u1,u0,[x],[x+48])
163 combadd(t1,u1,u0,[x+8],[x+40])
164 combadd(t1,u1,u0,[x+16],[x+32])
165 doubladd(t1,t0,t2,u1,u0)
166 combadd1(t1,t0,t2,[x+24])
167 mov [z+48], t2
168
169// Result term 7
170
171 combaddz(t2,u1,u0,[x],[x+56])
172 combadd(t2,u1,u0,[x+8],[x+48])
173 combadd(t2,u1,u0,[x+16],[x+40])
174 combadd(t2,u1,u0,[x+24],[x+32])
175 doubladd(t2,t1,t0,u1,u0)
176 mov [z+56], t0
177
178// Result term 8
179
180 combaddz(t0,u1,u0,[x+8],[x+56])
181 combadd(t0,u1,u0,[x+16],[x+48])
182 combadd(t0,u1,u0,[x+24],[x+40])
183 doubladd(t0,t2,t1,u1,u0)
184 combadd1(t0,t2,t1,[x+32])
185 mov [z+64], t1
186
187// Result term 9
188
189 combaddz(t1,u1,u0,[x+16],[x+56])
190 combadd(t1,u1,u0,[x+24],[x+48])
191 combadd(t1,u1,u0,[x+32],[x+40])
192 doubladd(t1,t0,t2,u1,u0)
193 mov [z+72], t2
194
195// Result term 10
196
197 combaddz(t2,u1,u0,[x+24],[x+56])
198 combadd(t2,u1,u0,[x+32],[x+48])
199 doubladd(t2,t1,t0,u1,u0)
200 combadd1(t2,t1,t0,[x+40])
201 mov [z+80], t0
202
203// Result term 11
204
205 combaddz(t0,u1,u0,[x+32],[x+56])
206 combadd(t0,u1,u0,[x+40],[x+48])
207 doubladd(t0,t2,t1,u1,u0)
208 mov [z+88], t1
209
210// Result term 12
211
212 xor t1, t1
213 combadd2(t1,t0,t2,[x+40],[x+56])
214 combadd1(t1,t0,t2,[x+48])
215 mov [z+96], t2
216
217// Result term 13
218
219 xor t2, t2
220 combadd2(t2,t1,t0,[x+48],[x+56])
221 mov [z+104], t0
222
223// Result term 14
224
225 combads(t2,t1,[x+56])
226 mov [z+112], t1
227
228// Result term 15
229
230 mov [z+120], t2
231
232// Return
233
234#if WINDOWS_ABI
235 pop rsi
236 pop rdi
237#endif
238 ret
239
240#if defined(__linux__) && defined(__ELF__)
241.section .note.GNU-stack,"",%progbits
242#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S
deleted file mode 100644
index 3ff8a30510..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S
+++ /dev/null
@@ -1,153 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Subtract, z := x - y
17// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
18//
19// extern uint64_t bignum_sub
20// (uint64_t p, uint64_t *z,
21// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
22//
23// Does the z := x - y operation, truncating modulo p words in general and
24// returning a top borrow (0 or 1) in the p'th place, only subtracting input
25// words below p (as well as m and n respectively) to get the diff and borrow.
26//
27// Standard x86-64 ABI: RDI = p, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y, returns RAX
28// Microsoft x64 ABI: RCX = p, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y, returns RAX
29// ----------------------------------------------------------------------------
30
31#include "s2n_bignum_internal.h"
32
33 .intel_syntax noprefix
34 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sub)
35 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sub)
36 .text
37
38#define p rdi
39#define z rsi
40#define m rdx
41#define x rcx
42#define n r8
43#define y r9
44#define i r10
45#define a rax
46
47#define ashort eax
48
49
50
51S2N_BN_SYMBOL(bignum_sub):
52 _CET_ENDBR
53
54#if WINDOWS_ABI
55 push rdi
56 push rsi
57 mov rdi, rcx
58 mov rsi, rdx
59 mov rdx, r8
60 mov rcx, r9
61 mov r8, [rsp+56]
62 mov r9, [rsp+64]
63#endif
64
65// Zero the main index counter for both branches
66
67 xor i, i
68
69// First clamp the two input sizes m := min(p,m) and n := min(p,n) since
70// we'll never need words past the p'th. Can now assume m <= p and n <= p.
71// Then compare the modified m and n and branch accordingly
72
73 cmp p, m
74 cmovc m, p
75 cmp p, n
76 cmovc n, p
77 cmp m, n
78 jc ylonger
79
80// The case where x is longer or of the same size (p >= m >= n)
81
82 sub p, m
83 sub m, n
84 inc m
85 test n, n
86 jz xtest
87xmainloop:
88 mov a, [x+8*i]
89 sbb a, [y+8*i]
90 mov [z+8*i],a
91 inc i
92 dec n
93 jnz xmainloop
94 jmp xtest
95xtoploop:
96 mov a, [x+8*i]
97 sbb a, 0
98 mov [z+8*i],a
99 inc i
100xtest:
101 dec m
102 jnz xtoploop
103 sbb a, a
104 test p, p
105 jz tailskip
106tailloop:
107 mov [z+8*i],a
108 inc i
109 dec p
110 jnz tailloop
111tailskip:
112 neg a
113#if WINDOWS_ABI
114 pop rsi
115 pop rdi
116#endif
117 ret
118
119// The case where y is longer (p >= n > m)
120
121ylonger:
122
123 sub p, n
124 sub n, m
125 test m, m
126 jz ytoploop
127ymainloop:
128 mov a, [x+8*i]
129 sbb a, [y+8*i]
130 mov [z+8*i],a
131 inc i
132 dec m
133 jnz ymainloop
134ytoploop:
135 mov ashort, 0
136 sbb a, [y+8*i]
137 mov [z+8*i],a
138 inc i
139 dec n
140 jnz ytoploop
141 sbb a, a
142 test p, p
143 jnz tailloop
144 neg a
145#if WINDOWS_ABI
146 pop rsi
147 pop rdi
148#endif
149 ret
150
151#if defined(__linux__) && defined(__ELF__)
152.section .note.GNU-stack,"",%progbits
153#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bn_arch.c b/src/lib/libcrypto/bn/arch/amd64/bn_arch.c
deleted file mode 100644
index a377a05681..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bn_arch.c
+++ /dev/null
@@ -1,131 +0,0 @@
1/* $OpenBSD: bn_arch.c,v 1.7 2023/06/24 16:01:44 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <openssl/bn.h>
19
20#include "bn_arch.h"
21#include "bn_local.h"
22#include "s2n_bignum.h"
23
24#ifdef HAVE_BN_ADD
25BN_ULONG
26bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b,
27 int b_len)
28{
29 return bignum_add(r_len, (uint64_t *)r, a_len, (uint64_t *)a,
30 b_len, (uint64_t *)b);
31}
32#endif
33
34
35#ifdef HAVE_BN_ADD_WORDS
36BN_ULONG
37bn_add_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n)
38{
39 return bignum_add(n, (uint64_t *)rd, n, (uint64_t *)ad, n,
40 (uint64_t *)bd);
41}
42#endif
43
44#ifdef HAVE_BN_SUB
45BN_ULONG
46bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b,
47 int b_len)
48{
49 return bignum_sub(r_len, (uint64_t *)r, a_len, (uint64_t *)a,
50 b_len, (uint64_t *)b);
51}
52#endif
53
54#ifdef HAVE_BN_SUB_WORDS
55BN_ULONG
56bn_sub_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n)
57{
58 return bignum_sub(n, (uint64_t *)rd, n, (uint64_t *)ad, n,
59 (uint64_t *)bd);
60}
61#endif
62
63#ifdef HAVE_BN_MUL_ADD_WORDS
64BN_ULONG
65bn_mul_add_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w)
66{
67 return bignum_cmadd(num, (uint64_t *)rd, w, num, (uint64_t *)ad);
68}
69#endif
70
71#ifdef HAVE_BN_MUL_WORDS
72BN_ULONG
73bn_mul_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w)
74{
75 return bignum_cmul(num, (uint64_t *)rd, w, num, (uint64_t *)ad);
76}
77#endif
78
79#ifdef HAVE_BN_MUL_COMBA4
80void
81bn_mul_comba4(BN_ULONG *rd, BN_ULONG *ad, BN_ULONG *bd)
82{
83 /* XXX - consider using non-alt on CPUs that have the ADX extension. */
84 bignum_mul_4_8_alt((uint64_t *)rd, (uint64_t *)ad, (uint64_t *)bd);
85}
86#endif
87
88#ifdef HAVE_BN_MUL_COMBA8
89void
90bn_mul_comba8(BN_ULONG *rd, BN_ULONG *ad, BN_ULONG *bd)
91{
92 /* XXX - consider using non-alt on CPUs that have the ADX extension. */
93 bignum_mul_8_16_alt((uint64_t *)rd, (uint64_t *)ad, (uint64_t *)bd);
94}
95#endif
96
97#ifdef HAVE_BN_SQR
98int
99bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx)
100{
101 bignum_sqr(r_len, (uint64_t *)r->d, a->top, (uint64_t *)a->d);
102
103 return 1;
104}
105#endif
106
107#ifdef HAVE_BN_SQR_COMBA4
108void
109bn_sqr_comba4(BN_ULONG *rd, const BN_ULONG *ad)
110{
111 /* XXX - consider using non-alt on CPUs that have the ADX extension. */
112 bignum_sqr_4_8_alt((uint64_t *)rd, (uint64_t *)ad);
113}
114#endif
115
116#ifdef HAVE_BN_SQR_COMBA8
117void
118bn_sqr_comba8(BN_ULONG *rd, const BN_ULONG *ad)
119{
120 /* XXX - consider using non-alt on CPUs that have the ADX extension. */
121 bignum_sqr_8_16_alt((uint64_t *)rd, (uint64_t *)ad);
122}
123#endif
124
125#ifdef HAVE_BN_WORD_CLZ
126int
127bn_word_clz(BN_ULONG w)
128{
129 return word_clz(w);
130}
131#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h b/src/lib/libcrypto/bn/arch/amd64/bn_arch.h
deleted file mode 100644
index 927cd75208..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h
+++ /dev/null
@@ -1,109 +0,0 @@
1/* $OpenBSD: bn_arch.h,v 1.14 2024/03/26 06:09:25 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <openssl/bn.h>
19
20#ifndef HEADER_BN_ARCH_H
21#define HEADER_BN_ARCH_H
22
23#ifndef OPENSSL_NO_ASM
24
25#define HAVE_BN_ADD
26#define HAVE_BN_ADD_WORDS
27
28#define HAVE_BN_DIV_WORDS
29
30#define HAVE_BN_MUL_ADD_WORDS
31#define HAVE_BN_MUL_COMBA4
32#define HAVE_BN_MUL_COMBA8
33#define HAVE_BN_MUL_WORDS
34
35#define HAVE_BN_SQR
36#define HAVE_BN_SQR_COMBA4
37#define HAVE_BN_SQR_COMBA8
38
39#define HAVE_BN_SUB
40#define HAVE_BN_SUB_WORDS
41
42#define HAVE_BN_WORD_CLZ
43
44#if defined(__GNUC__)
45
46#define HAVE_BN_DIV_REM_WORDS_INLINE
47
48static inline void
49bn_div_rem_words_inline(BN_ULONG h, BN_ULONG l, BN_ULONG d, BN_ULONG *out_q,
50 BN_ULONG *out_r)
51{
52 BN_ULONG q, r;
53
54 /*
55 * Unsigned division of %rdx:%rax by d with quotient being stored in
56 * %rax and remainder in %rdx.
57 */
58 __asm__ volatile ("divq %4"
59 : "=a"(q), "=d"(r)
60 : "d"(h), "a"(l), "rm"(d)
61 : "cc");
62
63 *out_q = q;
64 *out_r = r;
65}
66
67#define HAVE_BN_MULW
68
69static inline void
70bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
71{
72 BN_ULONG r1, r0;
73
74 /*
75 * Unsigned multiplication of %rax, with the double word result being
76 * stored in %rdx:%rax.
77 */
78 __asm__ ("mulq %3"
79 : "=d"(r1), "=a"(r0)
80 : "a"(a), "rm"(b)
81 : "cc");
82
83 *out_r1 = r1;
84 *out_r0 = r0;
85}
86
87#define HAVE_BN_SUBW
88
89static inline void
90bn_subw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_borrow, BN_ULONG *out_r0)
91{
92 BN_ULONG borrow, r0;
93
94 __asm__ (
95 "subq %3, %1 \n"
96 "setb %b0 \n"
97 "and $1, %0 \n"
98 : "=r"(borrow), "=r"(r0)
99 : "1"(a), "rm"(b)
100 : "cc");
101
102 *out_borrow = borrow;
103 *out_r0 = r0;
104}
105
106#endif /* __GNUC__ */
107
108#endif
109#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/word_clz.S b/src/lib/libcrypto/bn/arch/amd64/word_clz.S
deleted file mode 100644
index 3926fcd4b0..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/word_clz.S
+++ /dev/null
@@ -1,60 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Count leading zero bits in a single word
17// Input a; output function return
18//
19// extern uint64_t word_clz (uint64_t a);
20//
21// Standard x86-64 ABI: RDI = a, returns RAX
22// Microsoft x64 ABI: RCX = a, returns RAX
23// ----------------------------------------------------------------------------
24
25#include "s2n_bignum_internal.h"
26
27 .intel_syntax noprefix
28 S2N_BN_SYM_VISIBILITY_DIRECTIVE(word_clz)
29 S2N_BN_SYM_PRIVACY_DIRECTIVE(word_clz)
30 .text
31
32S2N_BN_SYMBOL(word_clz):
33 _CET_ENDBR
34
35#if WINDOWS_ABI
36 push rdi
37 push rsi
38 mov rdi, rcx
39#endif
40
41// First do rax = 63 - bsr(a), which is right except (maybe) for zero inputs
42
43 bsr rax, rdi
44 xor rax, 63
45
46// Force return of 64 in the zero-input case
47
48 mov edx, 64
49 test rdi, rdi
50 cmove rax, rdx
51
52#if WINDOWS_ABI
53 pop rsi
54 pop rdi
55#endif
56 ret
57
58#if defined(__linux__) && defined(__ELF__)
59.section .note.GNU-stack,"",%progbits
60#endif
diff --git a/src/lib/libcrypto/bn/arch/arm/bn_arch.h b/src/lib/libcrypto/bn/arch/arm/bn_arch.h
deleted file mode 100644
index ef9bf7f156..0000000000
--- a/src/lib/libcrypto/bn/arch/arm/bn_arch.h
+++ /dev/null
@@ -1,73 +0,0 @@
1/* $OpenBSD: bn_arch.h,v 1.2 2023/06/24 15:51:47 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <openssl/bn.h>
19
20#ifndef HEADER_BN_ARCH_H
21#define HEADER_BN_ARCH_H
22
23#ifndef OPENSSL_NO_ASM
24
25#if defined(__GNUC__)
26
27#define HAVE_BN_SUBW
28
29static inline void
30bn_subw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_borrow, BN_ULONG *out_r0)
31{
32 BN_ULONG borrow, r0;
33
34 __asm__ (
35 "mov %[borrow], #0 \n"
36 "subs %[r0], %[a], %[b] \n"
37 "sbc %[borrow], %[borrow], #0 \n"
38 "neg %[borrow], %[borrow] \n"
39 : [borrow]"=&r"(borrow), [r0]"=r"(r0)
40 : [a]"r"(a), [b]"r"(b)
41 : "cc");
42
43 *out_borrow = borrow;
44 *out_r0 = r0;
45}
46
47#define HAVE_BN_SUBW_SUBW
48
49static inline void
50bn_subw_subw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_borrow,
51 BN_ULONG *out_r0)
52{
53 BN_ULONG borrow, r0;
54
55 __asm__ (
56 "mov %[borrow], #0 \n"
57 "subs %[r0], %[a], %[b] \n"
58 "sbc %[borrow], %[borrow], #0 \n"
59 "subs %[r0], %[r0], %[c] \n"
60 "sbc %[borrow], %[borrow], #0 \n"
61 "neg %[borrow], %[borrow] \n"
62 : [borrow]"=&r"(borrow), [r0]"=&r"(r0)
63 : [a]"r"(a), [b]"r"(b), [c]"r"(c)
64 : "cc");
65
66 *out_borrow = borrow;
67 *out_r0 = r0;
68}
69
70#endif /* __GNUC__ */
71
72#endif
73#endif
diff --git a/src/lib/libcrypto/bn/arch/hppa/bn_arch.h b/src/lib/libcrypto/bn/arch/hppa/bn_arch.h
deleted file mode 100644
index 136adf0e97..0000000000
--- a/src/lib/libcrypto/bn/arch/hppa/bn_arch.h
+++ /dev/null
@@ -1,24 +0,0 @@
1/* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:33 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#ifndef HEADER_BN_ARCH_H
19#define HEADER_BN_ARCH_H
20
21#ifndef OPENSSL_NO_ASM
22
23#endif
24#endif
diff --git a/src/lib/libcrypto/bn/arch/i386/bn_arch.h b/src/lib/libcrypto/bn/arch/i386/bn_arch.h
deleted file mode 100644
index eef519fcc7..0000000000
--- a/src/lib/libcrypto/bn/arch/i386/bn_arch.h
+++ /dev/null
@@ -1,86 +0,0 @@
1/* $OpenBSD: bn_arch.h,v 1.9 2023/02/16 10:41:03 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <openssl/bn.h>
19
20#ifndef HEADER_BN_ARCH_H
21#define HEADER_BN_ARCH_H
22
23#ifndef OPENSSL_NO_ASM
24
25#define HAVE_BN_ADD_WORDS
26
27#define HAVE_BN_DIV_WORDS
28
29#define HAVE_BN_MUL_ADD_WORDS
30#define HAVE_BN_MUL_COMBA4
31#define HAVE_BN_MUL_COMBA8
32#define HAVE_BN_MUL_WORDS
33
34#define HAVE_BN_SQR_COMBA4
35#define HAVE_BN_SQR_COMBA8
36#define HAVE_BN_SQR_WORDS
37
38#define HAVE_BN_SUB_WORDS
39
40#if defined(__GNUC__)
41#define HAVE_BN_DIV_REM_WORDS_INLINE
42
43static inline void
44bn_div_rem_words_inline(BN_ULONG h, BN_ULONG l, BN_ULONG d, BN_ULONG *out_q,
45 BN_ULONG *out_r)
46{
47 BN_ULONG q, r;
48
49 /*
50 * Unsigned division of %edx:%eax by d with quotient being stored in
51 * %eax and remainder in %edx.
52 */
53 __asm__ volatile ("divl %4"
54 : "=a"(q), "=d"(r)
55 : "a"(l), "d"(h), "rm"(d)
56 : "cc");
57
58 *out_q = q;
59 *out_r = r;
60}
61#endif /* __GNUC__ */
62
63#if defined(__GNUC__)
64#define HAVE_BN_MULW
65
66static inline void
67bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
68{
69 BN_ULONG r1, r0;
70
71 /*
72 * Unsigned multiplication of %eax, with the double word result being
73 * stored in %edx:%eax.
74 */
75 __asm__ ("mull %3"
76 : "=d"(r1), "=a"(r0)
77 : "a"(a), "rm"(b)
78 : "cc");
79
80 *out_r1 = r1;
81 *out_r0 = r0;
82}
83#endif /* __GNUC__ */
84
85#endif
86#endif
diff --git a/src/lib/libcrypto/bn/arch/m88k/bn_arch.h b/src/lib/libcrypto/bn/arch/m88k/bn_arch.h
deleted file mode 100644
index 136adf0e97..0000000000
--- a/src/lib/libcrypto/bn/arch/m88k/bn_arch.h
+++ /dev/null
@@ -1,24 +0,0 @@
1/* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:33 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#ifndef HEADER_BN_ARCH_H
19#define HEADER_BN_ARCH_H
20
21#ifndef OPENSSL_NO_ASM
22
23#endif
24#endif
diff --git a/src/lib/libcrypto/bn/arch/mips64/bn_arch.h b/src/lib/libcrypto/bn/arch/mips64/bn_arch.h
deleted file mode 100644
index 53771bce1e..0000000000
--- a/src/lib/libcrypto/bn/arch/mips64/bn_arch.h
+++ /dev/null
@@ -1,40 +0,0 @@
1/* $OpenBSD: bn_arch.h,v 1.7 2023/01/23 12:17:58 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#ifndef HEADER_BN_ARCH_H
19#define HEADER_BN_ARCH_H
20
21#ifndef OPENSSL_NO_ASM
22
23#define HAVE_BN_ADD_WORDS
24
25#define HAVE_BN_DIV_WORDS
26#define HAVE_BN_DIV_3_WORDS
27
28#define HAVE_BN_MUL_ADD_WORDS
29#define HAVE_BN_MUL_COMBA4
30#define HAVE_BN_MUL_COMBA8
31#define HAVE_BN_MUL_WORDS
32
33#define HAVE_BN_SQR_COMBA4
34#define HAVE_BN_SQR_COMBA8
35#define HAVE_BN_SQR_WORDS
36
37#define HAVE_BN_SUB_WORDS
38
39#endif
40#endif
diff --git a/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h b/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h
deleted file mode 100644
index 46e932a2d5..0000000000
--- a/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h
+++ /dev/null
@@ -1,39 +0,0 @@
1/* $OpenBSD: bn_arch.h,v 1.6 2023/01/23 12:17:58 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#ifndef HEADER_BN_ARCH_H
19#define HEADER_BN_ARCH_H
20
21#ifndef OPENSSL_NO_ASM
22
23#define HAVE_BN_ADD_WORDS
24
25#define HAVE_BN_DIV_WORDS
26
27#define HAVE_BN_MUL_ADD_WORDS
28#define HAVE_BN_MUL_COMBA4
29#define HAVE_BN_MUL_COMBA8
30#define HAVE_BN_MUL_WORDS
31
32#define HAVE_BN_SQR_COMBA4
33#define HAVE_BN_SQR_COMBA8
34#define HAVE_BN_SQR_WORDS
35
36#define HAVE_BN_SUB_WORDS
37
38#endif
39#endif
diff --git a/src/lib/libcrypto/bn/arch/powerpc64/bn_arch.h b/src/lib/libcrypto/bn/arch/powerpc64/bn_arch.h
deleted file mode 100644
index 18bac203eb..0000000000
--- a/src/lib/libcrypto/bn/arch/powerpc64/bn_arch.h
+++ /dev/null
@@ -1,44 +0,0 @@
1/* $OpenBSD: bn_arch.h,v 1.4 2023/02/16 10:41:03 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#ifndef HEADER_BN_ARCH_H
19#define HEADER_BN_ARCH_H
20
21#ifndef OPENSSL_NO_ASM
22
23#if 0 /* Needs testing and enabling. */
24#if defined(__GNUC__)
25#define HAVE_BN_MULW
26
27static inline void
28bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
29{
30 BN_ULONG r1, r0;
31
32 /* Unsigned multiplication using a mulhdu/mul pair. */
33 __asm__ ("mulhdu %0, %2, %3; mul %1, %2, %3"
34 : "=&r"(r1), "=r"(r0)
35 : "r"(a), "r"(b));
36
37 *out_r1 = r1;
38 *out_r0 = r0;
39}
40#endif /* __GNUC__ */
41#endif
42
43#endif
44#endif
diff --git a/src/lib/libcrypto/bn/arch/riscv64/bn_arch.h b/src/lib/libcrypto/bn/arch/riscv64/bn_arch.h
deleted file mode 100644
index e67de835cf..0000000000
--- a/src/lib/libcrypto/bn/arch/riscv64/bn_arch.h
+++ /dev/null
@@ -1,86 +0,0 @@
1/* $OpenBSD: bn_arch.h,v 1.7 2023/07/09 10:37:32 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <openssl/bn.h>
19
20#ifndef HEADER_BN_ARCH_H
21#define HEADER_BN_ARCH_H
22
23#ifndef OPENSSL_NO_ASM
24
25#if defined(__GNUC__)
26
27#define HAVE_BN_ADDW
28
29static inline void
30bn_addw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
31{
32 BN_ULONG carry, r0;
33
34 __asm__ (
35 "add %[r0], %[a], %[b] \n"
36 "sltu %[carry], %[r0], %[a] \n"
37 : [carry]"=r"(carry), [r0]"=&r"(r0)
38 : [a]"r"(a), [b]"r"(b));
39
40 *out_r1 = carry;
41 *out_r0 = r0;
42}
43
44#define HAVE_BN_MULW
45
46static inline void
47bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
48{
49 BN_ULONG r1, r0;
50
51 /*
52 * Unsigned multiplication using a mulh/mul pair. Note that the order
53 * of these instructions is important, as they can potentially be fused
54 * into a single operation.
55 */
56 __asm__ (
57 "mulhu %[r1], %[a], %[b] \n"
58 "mul %[r0], %[a], %[b] \n"
59 : [r1]"=&r"(r1), [r0]"=r"(r0)
60 : [a]"r"(a), [b]"r"(b));
61
62 *out_r1 = r1;
63 *out_r0 = r0;
64}
65
66#define HAVE_BN_SUBW
67
68static inline void
69bn_subw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_borrow, BN_ULONG *out_r0)
70{
71 BN_ULONG borrow, r0;
72
73 __asm__ (
74 "sub %[r0], %[a], %[b] \n"
75 "sltu %[borrow], %[a], %[r0] \n"
76 : [borrow]"=r"(borrow), [r0]"=&r"(r0)
77 : [a]"r"(a), [b]"r"(b));
78
79 *out_borrow = borrow;
80 *out_r0 = r0;
81}
82
83#endif /* __GNUC__ */
84
85#endif
86#endif
diff --git a/src/lib/libcrypto/bn/arch/sh/bn_arch.h b/src/lib/libcrypto/bn/arch/sh/bn_arch.h
deleted file mode 100644
index 4d6571f9cb..0000000000
--- a/src/lib/libcrypto/bn/arch/sh/bn_arch.h
+++ /dev/null
@@ -1,24 +0,0 @@
1/* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:34 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#ifndef HEADER_BN_ARCH_H
19#define HEADER_BN_ARCH_H
20
21#ifndef OPENSSL_NO_ASM
22
23#endif
24#endif
diff --git a/src/lib/libcrypto/bn/arch/sparc64/bn_arch.h b/src/lib/libcrypto/bn/arch/sparc64/bn_arch.h
deleted file mode 100644
index 4d6571f9cb..0000000000
--- a/src/lib/libcrypto/bn/arch/sparc64/bn_arch.h
+++ /dev/null
@@ -1,24 +0,0 @@
1/* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:34 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#ifndef HEADER_BN_ARCH_H
19#define HEADER_BN_ARCH_H
20
21#ifndef OPENSSL_NO_ASM
22
23#endif
24#endif
diff --git a/src/lib/libcrypto/bn/asm/alpha-mont.pl b/src/lib/libcrypto/bn/asm/alpha-mont.pl
deleted file mode 100644
index 874597f1c0..0000000000
--- a/src/lib/libcrypto/bn/asm/alpha-mont.pl
+++ /dev/null
@@ -1,315 +0,0 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# On 21264 RSA sign performance improves by 70/35/20/15 percent for
11# 512/1024/2048/4096 bit key lengths. This is against vendor compiler
12# instructed to '-tune host' code with in-line assembler. Other
13# benchmarks improve by 15-20%. To anchor it to something else, the
14# code provides approximately the same performance per GHz as AMD64.
15# I.e. if you compare 1GHz 21264 and 2GHz Opteron, you'll observe ~2x
16# difference.
17
18# int bn_mul_mont(
19$rp="a0"; # BN_ULONG *rp,
20$ap="a1"; # const BN_ULONG *ap,
21$bp="a2"; # const BN_ULONG *bp,
22$np="a3"; # const BN_ULONG *np,
23$n0="a4"; # const BN_ULONG *n0,
24$num="a5"; # int num);
25
26$lo0="t0";
27$hi0="t1";
28$lo1="t2";
29$hi1="t3";
30$aj="t4";
31$bi="t5";
32$nj="t6";
33$tp="t7";
34$alo="t8";
35$ahi="t9";
36$nlo="t10";
37$nhi="t11";
38$tj="t12";
39$i="s3";
40$j="s4";
41$m1="s5";
42
43$code=<<___;
44#include <machine/asm.h>
45
46.text
47
48.set noat
49.set noreorder
50
51.globl bn_mul_mont
52.align 5
53.ent bn_mul_mont
54bn_mul_mont:
55 lda sp,-48(sp)
56 stq ra,0(sp)
57 stq s3,8(sp)
58 stq s4,16(sp)
59 stq s5,24(sp)
60 stq fp,32(sp)
61 mov sp,fp
62 .mask 0x0400f000,-48
63 .frame fp,48,ra
64 .prologue 0
65
66 .align 4
67 .set reorder
68 sextl $num,$num
69 mov 0,v0
70 cmplt $num,4,AT
71 bne AT,.Lexit
72
73 ldq $hi0,0($ap) # ap[0]
74 s8addq $num,16,AT
75 ldq $aj,8($ap)
76 subq sp,AT,sp
77 ldq $bi,0($bp) # bp[0]
78 lda AT,-4096(zero) # mov -4096,AT
79 ldq $n0,0($n0)
80 and sp,AT,sp
81
82 mulq $hi0,$bi,$lo0
83 ldq $hi1,0($np) # np[0]
84 umulh $hi0,$bi,$hi0
85 ldq $nj,8($np)
86
87 mulq $lo0,$n0,$m1
88
89 mulq $hi1,$m1,$lo1
90 umulh $hi1,$m1,$hi1
91
92 addq $lo1,$lo0,$lo1
93 cmpult $lo1,$lo0,AT
94 addq $hi1,AT,$hi1
95
96 mulq $aj,$bi,$alo
97 mov 2,$j
98 umulh $aj,$bi,$ahi
99 mov sp,$tp
100
101 mulq $nj,$m1,$nlo
102 s8addq $j,$ap,$aj
103 umulh $nj,$m1,$nhi
104 s8addq $j,$np,$nj
105.align 4
106.L1st:
107 .set noreorder
108 ldq $aj,0($aj)
109 addl $j,1,$j
110 ldq $nj,0($nj)
111 lda $tp,8($tp)
112
113 addq $alo,$hi0,$lo0
114 mulq $aj,$bi,$alo
115 cmpult $lo0,$hi0,AT
116 addq $nlo,$hi1,$lo1
117
118 mulq $nj,$m1,$nlo
119 addq $ahi,AT,$hi0
120 cmpult $lo1,$hi1,v0
121 cmplt $j,$num,$tj
122
123 umulh $aj,$bi,$ahi
124 addq $nhi,v0,$hi1
125 addq $lo1,$lo0,$lo1
126 s8addq $j,$ap,$aj
127
128 umulh $nj,$m1,$nhi
129 cmpult $lo1,$lo0,v0
130 addq $hi1,v0,$hi1
131 s8addq $j,$np,$nj
132
133 stq $lo1,-8($tp)
134 nop
135 unop
136 bne $tj,.L1st
137 .set reorder
138
139 addq $alo,$hi0,$lo0
140 addq $nlo,$hi1,$lo1
141 cmpult $lo0,$hi0,AT
142 cmpult $lo1,$hi1,v0
143 addq $ahi,AT,$hi0
144 addq $nhi,v0,$hi1
145
146 addq $lo1,$lo0,$lo1
147 cmpult $lo1,$lo0,v0
148 addq $hi1,v0,$hi1
149
150 stq $lo1,0($tp)
151
152 addq $hi1,$hi0,$hi1
153 cmpult $hi1,$hi0,AT
154 stq $hi1,8($tp)
155 stq AT,16($tp)
156
157 mov 1,$i
158.align 4
159.Louter:
160 s8addq $i,$bp,$bi
161 ldq $hi0,0($ap)
162 ldq $aj,8($ap)
163 ldq $bi,0($bi)
164 ldq $hi1,0($np)
165 ldq $nj,8($np)
166 ldq $tj,0(sp)
167
168 mulq $hi0,$bi,$lo0
169 umulh $hi0,$bi,$hi0
170
171 addq $lo0,$tj,$lo0
172 cmpult $lo0,$tj,AT
173 addq $hi0,AT,$hi0
174
175 mulq $lo0,$n0,$m1
176
177 mulq $hi1,$m1,$lo1
178 umulh $hi1,$m1,$hi1
179
180 addq $lo1,$lo0,$lo1
181 cmpult $lo1,$lo0,AT
182 mov 2,$j
183 addq $hi1,AT,$hi1
184
185 mulq $aj,$bi,$alo
186 mov sp,$tp
187 umulh $aj,$bi,$ahi
188
189 mulq $nj,$m1,$nlo
190 s8addq $j,$ap,$aj
191 umulh $nj,$m1,$nhi
192.align 4
193.Linner:
194 .set noreorder
195 ldq $tj,8($tp) #L0
196 nop #U1
197 ldq $aj,0($aj) #L1
198 s8addq $j,$np,$nj #U0
199
200 ldq $nj,0($nj) #L0
201 nop #U1
202 addq $alo,$hi0,$lo0 #L1
203 lda $tp,8($tp)
204
205 mulq $aj,$bi,$alo #U1
206 cmpult $lo0,$hi0,AT #L0
207 addq $nlo,$hi1,$lo1 #L1
208 addl $j,1,$j
209
210 mulq $nj,$m1,$nlo #U1
211 addq $ahi,AT,$hi0 #L0
212 addq $lo0,$tj,$lo0 #L1
213 cmpult $lo1,$hi1,v0 #U0
214
215 umulh $aj,$bi,$ahi #U1
216 cmpult $lo0,$tj,AT #L0
217 addq $lo1,$lo0,$lo1 #L1
218 addq $nhi,v0,$hi1 #U0
219
220 umulh $nj,$m1,$nhi #U1
221 s8addq $j,$ap,$aj #L0
222 cmpult $lo1,$lo0,v0 #L1
223 cmplt $j,$num,$tj #U0 # borrow $tj
224
225 addq $hi0,AT,$hi0 #L0
226 addq $hi1,v0,$hi1 #U1
227 stq $lo1,-8($tp) #L1
228 bne $tj,.Linner #U0
229 .set reorder
230
231 ldq $tj,8($tp)
232 addq $alo,$hi0,$lo0
233 addq $nlo,$hi1,$lo1
234 cmpult $lo0,$hi0,AT
235 cmpult $lo1,$hi1,v0
236 addq $ahi,AT,$hi0
237 addq $nhi,v0,$hi1
238
239 addq $lo0,$tj,$lo0
240 cmpult $lo0,$tj,AT
241 addq $hi0,AT,$hi0
242
243 ldq $tj,16($tp)
244 addq $lo1,$lo0,$j
245 cmpult $j,$lo0,v0
246 addq $hi1,v0,$hi1
247
248 addq $hi1,$hi0,$lo1
249 stq $j,0($tp)
250 cmpult $lo1,$hi0,$hi1
251 addq $lo1,$tj,$lo1
252 cmpult $lo1,$tj,AT
253 addl $i,1,$i
254 addq $hi1,AT,$hi1
255 stq $lo1,8($tp)
256 cmplt $i,$num,$tj # borrow $tj
257 stq $hi1,16($tp)
258 bne $tj,.Louter
259
260 s8addq $num,sp,$tj # &tp[num]
261 mov $rp,$bp # put rp aside
262 mov sp,$tp
263 mov sp,$ap
264 mov 0,$hi0 # clear borrow bit
265
266.align 4
267.Lsub: ldq $lo0,0($tp)
268 ldq $lo1,0($np)
269 lda $tp,8($tp)
270 lda $np,8($np)
271 subq $lo0,$lo1,$lo1 # tp[i]-np[i]
272 cmpult $lo0,$lo1,AT
273 subq $lo1,$hi0,$lo0
274 cmpult $lo1,$lo0,$hi0
275 or $hi0,AT,$hi0
276 stq $lo0,0($rp)
277 cmpult $tp,$tj,v0
278 lda $rp,8($rp)
279 bne v0,.Lsub
280
281 subq $hi1,$hi0,$hi0 # handle upmost overflow bit
282 mov sp,$tp
283 mov $bp,$rp # restore rp
284
285 and sp,$hi0,$ap
286 bic $bp,$hi0,$bp
287 bis $bp,$ap,$ap # ap=borrow?tp:rp
288
289.align 4
290.Lcopy: ldq $aj,0($ap) # copy or in-place refresh
291 lda $tp,8($tp)
292 lda $rp,8($rp)
293 lda $ap,8($ap)
294 stq zero,-8($tp) # zap tp
295 cmpult $tp,$tj,AT
296 stq $aj,-8($rp)
297 bne AT,.Lcopy
298 mov 1,v0
299
300.Lexit:
301 .set noreorder
302 mov fp,sp
303 /*ldq ra,0(sp)*/
304 ldq s3,8(sp)
305 ldq s4,16(sp)
306 ldq s5,24(sp)
307 ldq fp,32(sp)
308 lda sp,48(sp)
309 ret (ra)
310.end bn_mul_mont
311.align 2
312___
313
314print $code;
315close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/armv4-mont.pl b/src/lib/libcrypto/bn/asm/armv4-mont.pl
deleted file mode 100644
index f78a8b5f0f..0000000000
--- a/src/lib/libcrypto/bn/asm/armv4-mont.pl
+++ /dev/null
@@ -1,204 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# January 2007.
11
12# Montgomery multiplication for ARMv4.
13#
14# Performance improvement naturally varies among CPU implementations
15# and compilers. The code was observed to provide +65-35% improvement
16# [depending on key length, less for longer keys] on ARM920T, and
17# +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code
18# base and compiler generated code with in-lined umull and even umlal
19# instructions. The latter means that this code didn't really have an
20# "advantage" of utilizing some "secret" instruction.
21#
22# The code is interoperable with Thumb ISA and is rather compact, less
23# than 1/2KB. Windows CE port would be trivial, as it's exclusively
24# about decorations, ABI and instruction syntax are identical.
25
26while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
27open STDOUT,">$output";
28
29$num="r0"; # starts as num argument, but holds &tp[num-1]
30$ap="r1";
31$bp="r2"; $bi="r2"; $rp="r2";
32$np="r3";
33$tp="r4";
34$aj="r5";
35$nj="r6";
36$tj="r7";
37$n0="r8";
38########### # r9 is reserved by ELF as platform specific, e.g. TLS pointer
39$alo="r10"; # sl, gcc uses it to keep @GOT
40$ahi="r11"; # fp
41$nlo="r12"; # ip
42########### # r13 is stack pointer
43$nhi="r14"; # lr
44########### # r15 is program counter
45
46#### argument block layout relative to &tp[num-1], a.k.a. $num
47$_rp="$num,#12*4";
48# ap permanently resides in r1
49$_bp="$num,#13*4";
50# np permanently resides in r3
51$_n0="$num,#14*4";
52$_num="$num,#15*4"; $_bpend=$_num;
53
54$code=<<___;
55.text
56
57.global bn_mul_mont
58.type bn_mul_mont,%function
59
60.align 2
61bn_mul_mont:
62 stmdb sp!,{r0,r2} @ sp points at argument block
63 ldr $num,[sp,#3*4] @ load num
64 cmp $num,#2
65 movlt r0,#0
66 addlt sp,sp,#2*4
67 blt .Labrt
68
69 stmdb sp!,{r4-r12,lr} @ save 10 registers
70
71 mov $num,$num,lsl#2 @ rescale $num for byte count
72 sub sp,sp,$num @ alloca(4*num)
73 sub sp,sp,#4 @ +extra dword
74 sub $num,$num,#4 @ "num=num-1"
75 add $tp,$bp,$num @ &bp[num-1]
76
77 add $num,sp,$num @ $num to point at &tp[num-1]
78 ldr $n0,[$_n0] @ &n0
79 ldr $bi,[$bp] @ bp[0]
80 ldr $aj,[$ap],#4 @ ap[0],ap++
81 ldr $nj,[$np],#4 @ np[0],np++
82 ldr $n0,[$n0] @ *n0
83 str $tp,[$_bpend] @ save &bp[num]
84
85 umull $alo,$ahi,$aj,$bi @ ap[0]*bp[0]
86 str $n0,[$_n0] @ save n0 value
87 mul $n0,$alo,$n0 @ "tp[0]"*n0
88 mov $nlo,#0
89 umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"t[0]"
90 mov $tp,sp
91
92.L1st:
93 ldr $aj,[$ap],#4 @ ap[j],ap++
94 mov $alo,$ahi
95 ldr $nj,[$np],#4 @ np[j],np++
96 mov $ahi,#0
97 umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[0]
98 mov $nhi,#0
99 umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0
100 adds $nlo,$nlo,$alo
101 str $nlo,[$tp],#4 @ tp[j-1]=,tp++
102 adc $nlo,$nhi,#0
103 cmp $tp,$num
104 bne .L1st
105
106 adds $nlo,$nlo,$ahi
107 ldr $tp,[$_bp] @ restore bp
108 mov $nhi,#0
109 ldr $n0,[$_n0] @ restore n0
110 adc $nhi,$nhi,#0
111 str $nlo,[$num] @ tp[num-1]=
112 str $nhi,[$num,#4] @ tp[num]=
113
114.Louter:
115 sub $tj,$num,sp @ "original" $num-1 value
116 sub $ap,$ap,$tj @ "rewind" ap to &ap[1]
117 ldr $bi,[$tp,#4]! @ *(++bp)
118 sub $np,$np,$tj @ "rewind" np to &np[1]
119 ldr $aj,[$ap,#-4] @ ap[0]
120 ldr $alo,[sp] @ tp[0]
121 ldr $nj,[$np,#-4] @ np[0]
122 ldr $tj,[sp,#4] @ tp[1]
123
124 mov $ahi,#0
125 umlal $alo,$ahi,$aj,$bi @ ap[0]*bp[i]+tp[0]
126 str $tp,[$_bp] @ save bp
127 mul $n0,$alo,$n0
128 mov $nlo,#0
129 umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"tp[0]"
130 mov $tp,sp
131
132.Linner:
133 ldr $aj,[$ap],#4 @ ap[j],ap++
134 adds $alo,$ahi,$tj @ +=tp[j]
135 ldr $nj,[$np],#4 @ np[j],np++
136 mov $ahi,#0
137 umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[i]
138 mov $nhi,#0
139 umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0
140 adc $ahi,$ahi,#0
141 ldr $tj,[$tp,#8] @ tp[j+1]
142 adds $nlo,$nlo,$alo
143 str $nlo,[$tp],#4 @ tp[j-1]=,tp++
144 adc $nlo,$nhi,#0
145 cmp $tp,$num
146 bne .Linner
147
148 adds $nlo,$nlo,$ahi
149 mov $nhi,#0
150 ldr $tp,[$_bp] @ restore bp
151 adc $nhi,$nhi,#0
152 ldr $n0,[$_n0] @ restore n0
153 adds $nlo,$nlo,$tj
154 ldr $tj,[$_bpend] @ restore &bp[num]
155 adc $nhi,$nhi,#0
156 str $nlo,[$num] @ tp[num-1]=
157 str $nhi,[$num,#4] @ tp[num]=
158
159 cmp $tp,$tj
160 bne .Louter
161
162 ldr $rp,[$_rp] @ pull rp
163 add $num,$num,#4 @ $num to point at &tp[num]
164 sub $aj,$num,sp @ "original" num value
165 mov $tp,sp @ "rewind" $tp
166 mov $ap,$tp @ "borrow" $ap
167 sub $np,$np,$aj @ "rewind" $np to &np[0]
168
169 subs $tj,$tj,$tj @ "clear" carry flag
170.Lsub: ldr $tj,[$tp],#4
171 ldr $nj,[$np],#4
172 sbcs $tj,$tj,$nj @ tp[j]-np[j]
173 str $tj,[$rp],#4 @ rp[j]=
174 teq $tp,$num @ preserve carry
175 bne .Lsub
176 sbcs $nhi,$nhi,#0 @ upmost carry
177 mov $tp,sp @ "rewind" $tp
178 sub $rp,$rp,$aj @ "rewind" $rp
179
180 and $ap,$tp,$nhi
181 bic $np,$rp,$nhi
182 orr $ap,$ap,$np @ ap=borrow?tp:rp
183
184.Lcopy: ldr $tj,[$ap],#4 @ copy or in-place refresh
185 str sp,[$tp],#4 @ zap tp
186 str $tj,[$rp],#4
187 cmp $tp,$num
188 bne .Lcopy
189
190 add sp,$num,#4 @ skip over tp[num+1]
191 ldmia sp!,{r4-r12,lr} @ restore registers
192 add sp,sp,#2*4 @ skip over {r0,r2}
193 mov r0,#1
194.Labrt: tst lr,#1
195 moveq pc,lr @ be binary compatible with V4, yet
196 bx lr @ interoperable with Thumb ISA:-)
197.size bn_mul_mont,.-bn_mul_mont
198.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
199.align 2
200___
201
202$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
203print $code;
204close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl
deleted file mode 100644
index 71b775af8d..0000000000
--- a/src/lib/libcrypto/bn/asm/bn-586.pl
+++ /dev/null
@@ -1,567 +0,0 @@
1#!/usr/local/bin/perl
2
3$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
4push(@INC,"${dir}","${dir}../../perlasm");
5require "x86asm.pl";
6
7&asm_init($ARGV[0],$0);
8
9$sse2=0;
10for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
11
12&external_label("OPENSSL_ia32cap_P") if ($sse2);
13
14&bn_mul_add_words("bn_mul_add_words");
15&bn_mul_words("bn_mul_words");
16&bn_sqr_words("bn_sqr_words");
17&bn_div_words("bn_div_words");
18&bn_add_words("bn_add_words");
19&bn_sub_words("bn_sub_words");
20
21&asm_finish();
22
23sub bn_mul_add_words
24 {
25 local($name)=@_;
26
27 &function_begin_B($name,"");
28
29 $r="eax";
30 $a="edx";
31 $c="ecx";
32
33 if ($sse2) {
34 &picsetup("eax");
35 &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
36 &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
37 &jnc(&label("maw_non_sse2"));
38
39 &mov($r,&wparam(0));
40 &mov($a,&wparam(1));
41 &mov($c,&wparam(2));
42 &movd("mm0",&wparam(3)); # mm0 = w
43 &pxor("mm1","mm1"); # mm1 = carry_in
44 &jmp(&label("maw_sse2_entry"));
45
46 &set_label("maw_sse2_unrolled",16);
47 &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0]
48 &paddq("mm1","mm3"); # mm1 = carry_in + r[0]
49 &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0]
50 &pmuludq("mm2","mm0"); # mm2 = w*a[0]
51 &movd("mm4",&DWP(4,$a,"",0)); # mm4 = a[1]
52 &pmuludq("mm4","mm0"); # mm4 = w*a[1]
53 &movd("mm6",&DWP(8,$a,"",0)); # mm6 = a[2]
54 &pmuludq("mm6","mm0"); # mm6 = w*a[2]
55 &movd("mm7",&DWP(12,$a,"",0)); # mm7 = a[3]
56 &pmuludq("mm7","mm0"); # mm7 = w*a[3]
57 &paddq("mm1","mm2"); # mm1 = carry_in + r[0] + w*a[0]
58 &movd("mm3",&DWP(4,$r,"",0)); # mm3 = r[1]
59 &paddq("mm3","mm4"); # mm3 = r[1] + w*a[1]
60 &movd("mm5",&DWP(8,$r,"",0)); # mm5 = r[2]
61 &paddq("mm5","mm6"); # mm5 = r[2] + w*a[2]
62 &movd("mm4",&DWP(12,$r,"",0)); # mm4 = r[3]
63 &paddq("mm7","mm4"); # mm7 = r[3] + w*a[3]
64 &movd(&DWP(0,$r,"",0),"mm1");
65 &movd("mm2",&DWP(16,$a,"",0)); # mm2 = a[4]
66 &pmuludq("mm2","mm0"); # mm2 = w*a[4]
67 &psrlq("mm1",32); # mm1 = carry0
68 &movd("mm4",&DWP(20,$a,"",0)); # mm4 = a[5]
69 &pmuludq("mm4","mm0"); # mm4 = w*a[5]
70 &paddq("mm1","mm3"); # mm1 = carry0 + r[1] + w*a[1]
71 &movd("mm6",&DWP(24,$a,"",0)); # mm6 = a[6]
72 &pmuludq("mm6","mm0"); # mm6 = w*a[6]
73 &movd(&DWP(4,$r,"",0),"mm1");
74 &psrlq("mm1",32); # mm1 = carry1
75 &movd("mm3",&DWP(28,$a,"",0)); # mm3 = a[7]
76 &add($a,32);
77 &pmuludq("mm3","mm0"); # mm3 = w*a[7]
78 &paddq("mm1","mm5"); # mm1 = carry1 + r[2] + w*a[2]
79 &movd("mm5",&DWP(16,$r,"",0)); # mm5 = r[4]
80 &paddq("mm2","mm5"); # mm2 = r[4] + w*a[4]
81 &movd(&DWP(8,$r,"",0),"mm1");
82 &psrlq("mm1",32); # mm1 = carry2
83 &paddq("mm1","mm7"); # mm1 = carry2 + r[3] + w*a[3]
84 &movd("mm5",&DWP(20,$r,"",0)); # mm5 = r[5]
85 &paddq("mm4","mm5"); # mm4 = r[5] + w*a[5]
86 &movd(&DWP(12,$r,"",0),"mm1");
87 &psrlq("mm1",32); # mm1 = carry3
88 &paddq("mm1","mm2"); # mm1 = carry3 + r[4] + w*a[4]
89 &movd("mm5",&DWP(24,$r,"",0)); # mm5 = r[6]
90 &paddq("mm6","mm5"); # mm6 = r[6] + w*a[6]
91 &movd(&DWP(16,$r,"",0),"mm1");
92 &psrlq("mm1",32); # mm1 = carry4
93 &paddq("mm1","mm4"); # mm1 = carry4 + r[5] + w*a[5]
94 &movd("mm5",&DWP(28,$r,"",0)); # mm5 = r[7]
95 &paddq("mm3","mm5"); # mm3 = r[7] + w*a[7]
96 &movd(&DWP(20,$r,"",0),"mm1");
97 &psrlq("mm1",32); # mm1 = carry5
98 &paddq("mm1","mm6"); # mm1 = carry5 + r[6] + w*a[6]
99 &movd(&DWP(24,$r,"",0),"mm1");
100 &psrlq("mm1",32); # mm1 = carry6
101 &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7]
102 &movd(&DWP(28,$r,"",0),"mm1");
103 &lea($r,&DWP(32,$r));
104 &psrlq("mm1",32); # mm1 = carry_out
105
106 &sub($c,8);
107 &jz(&label("maw_sse2_exit"));
108 &set_label("maw_sse2_entry");
109 &test($c,0xfffffff8);
110 &jnz(&label("maw_sse2_unrolled"));
111
112 &set_label("maw_sse2_loop",4);
113 &movd("mm2",&DWP(0,$a)); # mm2 = a[i]
114 &movd("mm3",&DWP(0,$r)); # mm3 = r[i]
115 &pmuludq("mm2","mm0"); # a[i] *= w
116 &lea($a,&DWP(4,$a));
117 &paddq("mm1","mm3"); # carry += r[i]
118 &paddq("mm1","mm2"); # carry += a[i]*w
119 &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low
120 &sub($c,1);
121 &psrlq("mm1",32); # carry = carry_high
122 &lea($r,&DWP(4,$r));
123 &jnz(&label("maw_sse2_loop"));
124 &set_label("maw_sse2_exit");
125 &movd("eax","mm1"); # c = carry_out
126 &emms();
127 &ret();
128
129 &set_label("maw_non_sse2",16);
130 }
131
132 # function_begin prologue
133 &push("ebp");
134 &push("ebx");
135 &push("esi");
136 &push("edi");
137
138 &comment("");
139 $Low="eax";
140 $High="edx";
141 $a="ebx";
142 $w="ebp";
143 $r="edi";
144 $c="esi";
145
146 &xor($c,$c); # clear carry
147 &mov($r,&wparam(0)); #
148
149 &mov("ecx",&wparam(2)); #
150 &mov($a,&wparam(1)); #
151
152 &and("ecx",0xfffffff8); # num / 8
153 &mov($w,&wparam(3)); #
154
155 &push("ecx"); # Up the stack for a tmp variable
156
157 &jz(&label("maw_finish"));
158
159 &set_label("maw_loop",16);
160
161 for ($i=0; $i<32; $i+=4)
162 {
163 &comment("Round $i");
164
165 &mov("eax",&DWP($i,$a)); # *a
166 &mul($w); # *a * w
167 &add("eax",$c); # L(t)+= c
168 &adc("edx",0); # H(t)+=carry
169 &add("eax",&DWP($i,$r)); # L(t)+= *r
170 &adc("edx",0); # H(t)+=carry
171 &mov(&DWP($i,$r),"eax"); # *r= L(t);
172 &mov($c,"edx"); # c= H(t);
173 }
174
175 &comment("");
176 &sub("ecx",8);
177 &lea($a,&DWP(32,$a));
178 &lea($r,&DWP(32,$r));
179 &jnz(&label("maw_loop"));
180
181 &set_label("maw_finish",0);
182 &mov("ecx",&wparam(2)); # get num
183 &and("ecx",7);
184 &jnz(&label("maw_finish2")); # helps branch prediction
185 &jmp(&label("maw_end"));
186
187 &set_label("maw_finish2",1);
188 for ($i=0; $i<7; $i++)
189 {
190 &comment("Tail Round $i");
191 &mov("eax",&DWP($i*4,$a)); # *a
192 &mul($w); # *a * w
193 &add("eax",$c); # L(t)+=c
194 &adc("edx",0); # H(t)+=carry
195 &add("eax",&DWP($i*4,$r)); # L(t)+= *r
196 &adc("edx",0); # H(t)+=carry
197 &dec("ecx") if ($i != 7-1);
198 &mov(&DWP($i*4,$r),"eax"); # *r= L(t);
199 &mov($c,"edx"); # c= H(t);
200 &jz(&label("maw_end")) if ($i != 7-1);
201 }
202 &set_label("maw_end",0);
203 &mov("eax",$c);
204
205 &pop("ecx"); # clear variable from
206
207 &function_end($name);
208 }
209
210sub bn_mul_words
211 {
212 local($name)=@_;
213
214 &function_begin_B($name,"");
215
216 $r="eax";
217 $a="edx";
218 $c="ecx";
219
220 if ($sse2) {
221 &picsetup("eax");
222 &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
223 &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
224 &jnc(&label("mw_non_sse2"));
225
226 &mov($r,&wparam(0));
227 &mov($a,&wparam(1));
228 &mov($c,&wparam(2));
229 &movd("mm0",&wparam(3)); # mm0 = w
230 &pxor("mm1","mm1"); # mm1 = carry = 0
231
232 &set_label("mw_sse2_loop",16);
233 &movd("mm2",&DWP(0,$a)); # mm2 = a[i]
234 &pmuludq("mm2","mm0"); # a[i] *= w
235 &lea($a,&DWP(4,$a));
236 &paddq("mm1","mm2"); # carry += a[i]*w
237 &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low
238 &sub($c,1);
239 &psrlq("mm1",32); # carry = carry_high
240 &lea($r,&DWP(4,$r));
241 &jnz(&label("mw_sse2_loop"));
242
243 &movd("eax","mm1"); # return carry
244 &emms();
245 &ret();
246 &set_label("mw_non_sse2",16);
247 }
248
249 # function_begin prologue
250 &push("ebp");
251 &push("ebx");
252 &push("esi");
253 &push("edi");
254
255 &comment("");
256 $Low="eax";
257 $High="edx";
258 $a="ebx";
259 $w="ecx";
260 $r="edi";
261 $c="esi";
262 $num="ebp";
263
264 &xor($c,$c); # clear carry
265 &mov($r,&wparam(0)); #
266 &mov($a,&wparam(1)); #
267 &mov($num,&wparam(2)); #
268 &mov($w,&wparam(3)); #
269
270 &and($num,0xfffffff8); # num / 8
271 &jz(&label("mw_finish"));
272
273 &set_label("mw_loop",0);
274 for ($i=0; $i<32; $i+=4)
275 {
276 &comment("Round $i");
277
278 &mov("eax",&DWP($i,$a,"",0)); # *a
279 &mul($w); # *a * w
280 &add("eax",$c); # L(t)+=c
281 # XXX
282
283 &adc("edx",0); # H(t)+=carry
284 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
285
286 &mov($c,"edx"); # c= H(t);
287 }
288
289 &comment("");
290 &add($a,32);
291 &add($r,32);
292 &sub($num,8);
293 &jz(&label("mw_finish"));
294 &jmp(&label("mw_loop"));
295
296 &set_label("mw_finish",0);
297 &mov($num,&wparam(2)); # get num
298 &and($num,7);
299 &jnz(&label("mw_finish2"));
300 &jmp(&label("mw_end"));
301
302 &set_label("mw_finish2",1);
303 for ($i=0; $i<7; $i++)
304 {
305 &comment("Tail Round $i");
306 &mov("eax",&DWP($i*4,$a,"",0));# *a
307 &mul($w); # *a * w
308 &add("eax",$c); # L(t)+=c
309 # XXX
310 &adc("edx",0); # H(t)+=carry
311 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
312 &mov($c,"edx"); # c= H(t);
313 &dec($num) if ($i != 7-1);
314 &jz(&label("mw_end")) if ($i != 7-1);
315 }
316 &set_label("mw_end",0);
317 &mov("eax",$c);
318
319 &function_end($name);
320 }
321
322sub bn_sqr_words
323 {
324 local($name)=@_;
325
326 &function_begin_B($name,"");
327
328 $r="eax";
329 $a="edx";
330 $c="ecx";
331
332 if ($sse2) {
333 &picsetup("eax");
334 &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
335 &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
336 &jnc(&label("sqr_non_sse2"));
337
338 &mov($r,&wparam(0));
339 &mov($a,&wparam(1));
340 &mov($c,&wparam(2));
341
342 &set_label("sqr_sse2_loop",16);
343 &movd("mm0",&DWP(0,$a)); # mm0 = a[i]
344 &pmuludq("mm0","mm0"); # a[i] *= a[i]
345 &lea($a,&DWP(4,$a)); # a++
346 &movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i]
347 &sub($c,1);
348 &lea($r,&DWP(8,$r)); # r += 2
349 &jnz(&label("sqr_sse2_loop"));
350
351 &emms();
352 &ret();
353 &set_label("sqr_non_sse2",16);
354 }
355
356 # function_begin prologue
357 &push("ebp");
358 &push("ebx");
359 &push("esi");
360 &push("edi");
361
362 &comment("");
363 $r="esi";
364 $a="edi";
365 $num="ebx";
366
367 &mov($r,&wparam(0)); #
368 &mov($a,&wparam(1)); #
369 &mov($num,&wparam(2)); #
370
371 &and($num,0xfffffff8); # num / 8
372 &jz(&label("sw_finish"));
373
374 &set_label("sw_loop",0);
375 for ($i=0; $i<32; $i+=4)
376 {
377 &comment("Round $i");
378 &mov("eax",&DWP($i,$a,"",0)); # *a
379 # XXX
380 &mul("eax"); # *a * *a
381 &mov(&DWP($i*2,$r,"",0),"eax"); #
382 &mov(&DWP($i*2+4,$r,"",0),"edx");#
383 }
384
385 &comment("");
386 &add($a,32);
387 &add($r,64);
388 &sub($num,8);
389 &jnz(&label("sw_loop"));
390
391 &set_label("sw_finish",0);
392 &mov($num,&wparam(2)); # get num
393 &and($num,7);
394 &jz(&label("sw_end"));
395
396 for ($i=0; $i<7; $i++)
397 {
398 &comment("Tail Round $i");
399 &mov("eax",&DWP($i*4,$a,"",0)); # *a
400 # XXX
401 &mul("eax"); # *a * *a
402 &mov(&DWP($i*8,$r,"",0),"eax"); #
403 &dec($num) if ($i != 7-1);
404 &mov(&DWP($i*8+4,$r,"",0),"edx");
405 &jz(&label("sw_end")) if ($i != 7-1);
406 }
407 &set_label("sw_end",0);
408
409 &function_end($name);
410 }
411
412sub bn_div_words
413 {
414 local($name)=@_;
415
416 &function_begin_B($name,"");
417 &mov("edx",&wparam(0)); #
418 &mov("eax",&wparam(1)); #
419 &mov("ecx",&wparam(2)); #
420 &div("ecx");
421 &ret();
422 &function_end_B($name);
423 }
424
425sub bn_add_words
426 {
427 local($name)=@_;
428
429 &function_begin($name,"");
430
431 &comment("");
432 $a="esi";
433 $b="edi";
434 $c="eax";
435 $r="ebx";
436 $tmp1="ecx";
437 $tmp2="edx";
438 $num="ebp";
439
440 &mov($r,&wparam(0)); # get r
441 &mov($a,&wparam(1)); # get a
442 &mov($b,&wparam(2)); # get b
443 &mov($num,&wparam(3)); # get num
444 &xor($c,$c); # clear carry
445 &and($num,0xfffffff8); # num / 8
446
447 &jz(&label("aw_finish"));
448
449 &set_label("aw_loop",0);
450 for ($i=0; $i<8; $i++)
451 {
452 &comment("Round $i");
453
454 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
455 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
456 &add($tmp1,$c);
457 &mov($c,0);
458 &adc($c,$c);
459 &add($tmp1,$tmp2);
460 &adc($c,0);
461 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
462 }
463
464 &comment("");
465 &add($a,32);
466 &add($b,32);
467 &add($r,32);
468 &sub($num,8);
469 &jnz(&label("aw_loop"));
470
471 &set_label("aw_finish",0);
472 &mov($num,&wparam(3)); # get num
473 &and($num,7);
474 &jz(&label("aw_end"));
475
476 for ($i=0; $i<7; $i++)
477 {
478 &comment("Tail Round $i");
479 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
480 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
481 &add($tmp1,$c);
482 &mov($c,0);
483 &adc($c,$c);
484 &add($tmp1,$tmp2);
485 &adc($c,0);
486 &dec($num) if ($i != 6);
487 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
488 &jz(&label("aw_end")) if ($i != 6);
489 }
490 &set_label("aw_end",0);
491
492# &mov("eax",$c); # $c is "eax"
493
494 &function_end($name);
495 }
496
497sub bn_sub_words
498 {
499 local($name)=@_;
500
501 &function_begin($name,"");
502
503 &comment("");
504 $a="esi";
505 $b="edi";
506 $c="eax";
507 $r="ebx";
508 $tmp1="ecx";
509 $tmp2="edx";
510 $num="ebp";
511
512 &mov($r,&wparam(0)); # get r
513 &mov($a,&wparam(1)); # get a
514 &mov($b,&wparam(2)); # get b
515 &mov($num,&wparam(3)); # get num
516 &xor($c,$c); # clear carry
517 &and($num,0xfffffff8); # num / 8
518
519 &jz(&label("aw_finish"));
520
521 &set_label("aw_loop",0);
522 for ($i=0; $i<8; $i++)
523 {
524 &comment("Round $i");
525
526 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
527 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
528 &sub($tmp1,$c);
529 &mov($c,0);
530 &adc($c,$c);
531 &sub($tmp1,$tmp2);
532 &adc($c,0);
533 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
534 }
535
536 &comment("");
537 &add($a,32);
538 &add($b,32);
539 &add($r,32);
540 &sub($num,8);
541 &jnz(&label("aw_loop"));
542
543 &set_label("aw_finish",0);
544 &mov($num,&wparam(3)); # get num
545 &and($num,7);
546 &jz(&label("aw_end"));
547
548 for ($i=0; $i<7; $i++)
549 {
550 &comment("Tail Round $i");
551 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
552 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
553 &sub($tmp1,$c);
554 &mov($c,0);
555 &adc($c,$c);
556 &sub($tmp1,$tmp2);
557 &adc($c,0);
558 &dec($num) if ($i != 6);
559 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
560 &jz(&label("aw_end")) if ($i != 6);
561 }
562 &set_label("aw_end",0);
563
564# &mov("eax",$c); # $c is "eax"
565
566 &function_end($name);
567 }
diff --git a/src/lib/libcrypto/bn/asm/co-586.pl b/src/lib/libcrypto/bn/asm/co-586.pl
deleted file mode 100644
index 37d79cc0c1..0000000000
--- a/src/lib/libcrypto/bn/asm/co-586.pl
+++ /dev/null
@@ -1,287 +0,0 @@
1#!/usr/local/bin/perl
2
3$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
4push(@INC,"${dir}","${dir}../../perlasm");
5require "x86asm.pl";
6
7&asm_init($ARGV[0],$0);
8
9&bn_mul_comba("bn_mul_comba8",8);
10&bn_mul_comba("bn_mul_comba4",4);
11&bn_sqr_comba("bn_sqr_comba8",8);
12&bn_sqr_comba("bn_sqr_comba4",4);
13
14&asm_finish();
15
16sub mul_add_c
17 {
18 local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
19
20 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
21 # words, and 1 if load return value
22
23 &comment("mul a[$ai]*b[$bi]");
24
25 # "eax" and "edx" will always be pre-loaded.
26 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
27 # &mov("edx",&DWP($bi*4,$b,"",0));
28
29 &mul("edx");
30 &add($c0,"eax");
31 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
32 &mov("eax",&wparam(0)) if $pos > 0; # load r[]
33 ###
34 &adc($c1,"edx");
35 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # load next b
36 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # load next b
37 ###
38 &adc($c2,0);
39 # is pos > 1, it means it is the last loop
40 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
41 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next a
42 }
43
44sub sqr_add_c
45 {
46 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
47
48 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
49 # words, and 1 if load return value
50
51 &comment("sqr a[$ai]*a[$bi]");
52
53 # "eax" and "edx" will always be pre-loaded.
54 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
55 # &mov("edx",&DWP($bi*4,$b,"",0));
56
57 if ($ai == $bi)
58 { &mul("eax");}
59 else
60 { &mul("edx");}
61 &add($c0,"eax");
62 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
63 ###
64 &adc($c1,"edx");
65 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
66 ###
67 &adc($c2,0);
68 # is pos > 1, it means it is the last loop
69 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
70 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
71 }
72
73sub sqr_add_c2
74 {
75 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
76
77 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
78 # words, and 1 if load return value
79
80 &comment("sqr a[$ai]*a[$bi]");
81
82 # "eax" and "edx" will always be pre-loaded.
83 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
84 # &mov("edx",&DWP($bi*4,$a,"",0));
85
86 if ($ai == $bi)
87 { &mul("eax");}
88 else
89 { &mul("edx");}
90 &add("eax","eax");
91 ###
92 &adc("edx","edx");
93 ###
94 &adc($c2,0);
95 &add($c0,"eax");
96 &adc($c1,"edx");
97 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
98 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
99 &adc($c2,0);
100 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
101 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
102 ###
103 }
104
105sub bn_mul_comba
106 {
107 local($name,$num)=@_;
108 local($a,$b,$c0,$c1,$c2);
109 local($i,$as,$ae,$bs,$be,$ai,$bi);
110 local($tot,$end);
111
112 &function_begin_B($name,"");
113
114 $c0="ebx";
115 $c1="ecx";
116 $c2="ebp";
117 $a="esi";
118 $b="edi";
119
120 $as=0;
121 $ae=0;
122 $bs=0;
123 $be=0;
124 $tot=$num+$num-1;
125
126 &push("esi");
127 &mov($a,&wparam(1));
128 &push("edi");
129 &mov($b,&wparam(2));
130 &push("ebp");
131 &push("ebx");
132
133 &xor($c0,$c0);
134 &mov("eax",&DWP(0,$a,"",0)); # load the first word
135 &xor($c1,$c1);
136 &mov("edx",&DWP(0,$b,"",0)); # load the first second
137
138 for ($i=0; $i<$tot; $i++)
139 {
140 $ai=$as;
141 $bi=$bs;
142 $end=$be+1;
143
144 &comment("################## Calculate word $i");
145
146 for ($j=$bs; $j<$end; $j++)
147 {
148 &xor($c2,$c2) if ($j == $bs);
149 if (($j+1) == $end)
150 {
151 $v=1;
152 $v=2 if (($i+1) == $tot);
153 }
154 else
155 { $v=0; }
156 if (($j+1) != $end)
157 {
158 $na=($ai-1);
159 $nb=($bi+1);
160 }
161 else
162 {
163 $na=$as+($i < ($num-1));
164 $nb=$bs+($i >= ($num-1));
165 }
166#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
167 &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
168 if ($v)
169 {
170 &comment("saved r[$i]");
171 # &mov("eax",&wparam(0));
172 # &mov(&DWP($i*4,"eax","",0),$c0);
173 ($c0,$c1,$c2)=($c1,$c2,$c0);
174 }
175 $ai--;
176 $bi++;
177 }
178 $as++ if ($i < ($num-1));
179 $ae++ if ($i >= ($num-1));
180
181 $bs++ if ($i >= ($num-1));
182 $be++ if ($i < ($num-1));
183 }
184 &comment("save r[$i]");
185 # &mov("eax",&wparam(0));
186 &mov(&DWP($i*4,"eax","",0),$c0);
187
188 &pop("ebx");
189 &pop("ebp");
190 &pop("edi");
191 &pop("esi");
192 &ret();
193 &function_end_B($name);
194 }
195
196sub bn_sqr_comba
197 {
198 local($name,$num)=@_;
199 local($r,$a,$c0,$c1,$c2)=@_;
200 local($i,$as,$ae,$bs,$be,$ai,$bi);
201 local($b,$tot,$end,$half);
202
203 &function_begin_B($name,"");
204
205 $c0="ebx";
206 $c1="ecx";
207 $c2="ebp";
208 $a="esi";
209 $r="edi";
210
211 &push("esi");
212 &push("edi");
213 &push("ebp");
214 &push("ebx");
215 &mov($r,&wparam(0));
216 &mov($a,&wparam(1));
217 &xor($c0,$c0);
218 &xor($c1,$c1);
219 &mov("eax",&DWP(0,$a,"",0)); # load the first word
220
221 $as=0;
222 $ae=0;
223 $bs=0;
224 $be=0;
225 $tot=$num+$num-1;
226
227 for ($i=0; $i<$tot; $i++)
228 {
229 $ai=$as;
230 $bi=$bs;
231 $end=$be+1;
232
233 &comment("############### Calculate word $i");
234 for ($j=$bs; $j<$end; $j++)
235 {
236 &xor($c2,$c2) if ($j == $bs);
237 if (($ai-1) < ($bi+1))
238 {
239 $v=1;
240 $v=2 if ($i+1) == $tot;
241 }
242 else
243 { $v=0; }
244 if (!$v)
245 {
246 $na=$ai-1;
247 $nb=$bi+1;
248 }
249 else
250 {
251 $na=$as+($i < ($num-1));
252 $nb=$bs+($i >= ($num-1));
253 }
254 if ($ai == $bi)
255 {
256 &sqr_add_c($r,$a,$ai,$bi,
257 $c0,$c1,$c2,$v,$i,$na,$nb);
258 }
259 else
260 {
261 &sqr_add_c2($r,$a,$ai,$bi,
262 $c0,$c1,$c2,$v,$i,$na,$nb);
263 }
264 if ($v)
265 {
266 &comment("saved r[$i]");
267 #&mov(&DWP($i*4,$r,"",0),$c0);
268 ($c0,$c1,$c2)=($c1,$c2,$c0);
269 last;
270 }
271 $ai--;
272 $bi++;
273 }
274 $as++ if ($i < ($num-1));
275 $ae++ if ($i >= ($num-1));
276
277 $bs++ if ($i >= ($num-1));
278 $be++ if ($i < ($num-1));
279 }
280 &mov(&DWP($i*4,$r,"",0),$c0);
281 &pop("ebx");
282 &pop("ebp");
283 &pop("edi");
284 &pop("esi");
285 &ret();
286 &function_end_B($name);
287 }
diff --git a/src/lib/libcrypto/bn/asm/mips-mont.pl b/src/lib/libcrypto/bn/asm/mips-mont.pl
deleted file mode 100644
index caae04ed3a..0000000000
--- a/src/lib/libcrypto/bn/asm/mips-mont.pl
+++ /dev/null
@@ -1,426 +0,0 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# This module doesn't present direct interest for OpenSSL, because it
11# doesn't provide better performance for longer keys, at least not on
12# in-order-execution cores. While 512-bit RSA sign operations can be
13# 65% faster in 64-bit mode, 1024-bit ones are only 15% faster, and
14# 4096-bit ones are up to 15% slower. In 32-bit mode it varies from
15# 16% improvement for 512-bit RSA sign to -33% for 4096-bit RSA
16# verify:-( All comparisons are against bn_mul_mont-free assembler.
17# The module might be of interest to embedded system developers, as
18# the code is smaller than 1KB, yet offers >3x improvement on MIPS64
19# and 75-30% [less for longer keys] on MIPS32 over compiler-generated
20# code.
21
22######################################################################
23# There is a number of MIPS ABI in use, O32 and N32/64 are most
24# widely used. Then there is a new contender: NUBI. It appears that if
25# one picks the latter, it's possible to arrange code in ABI neutral
26# manner. Therefore let's stick to NUBI register layout:
27#
28($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
29($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
30($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
31($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
32#
33# The return value is placed in $a0. Following coding rules facilitate
34# interoperability:
35#
36# - never ever touch $tp, "thread pointer", former $gp;
37# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
38# old code];
39# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
40#
41# For reference here is register layout for N32/64 MIPS ABIs:
42#
43# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
44# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
45# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
46# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
47# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
48#
49$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
50
51if ($flavour =~ /64|n32/i) {
52 $PTR_ADD="dadd"; # incidentally works even on n32
53 $PTR_SUB="dsub"; # incidentally works even on n32
54 $REG_S="sd";
55 $REG_L="ld";
56 $SZREG=8;
57} else {
58 $PTR_ADD="add";
59 $PTR_SUB="sub";
60 $REG_S="sw";
61 $REG_L="lw";
62 $SZREG=4;
63}
64$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff000 : 0x00ff0000;
65#
66# <appro@openssl.org>
67#
68######################################################################
69
70while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
71open STDOUT,">$output";
72
73if ($flavour =~ /64|n32/i) {
74 $LD="ld";
75 $ST="sd";
76 $MULTU="dmultu";
77 $ADDU="daddu";
78 $SUBU="dsubu";
79 $BNSZ=8;
80} else {
81 $LD="lw";
82 $ST="sw";
83 $MULTU="multu";
84 $ADDU="addu";
85 $SUBU="subu";
86 $BNSZ=4;
87}
88
89# int bn_mul_mont(
90$rp=$a0; # BN_ULONG *rp,
91$ap=$a1; # const BN_ULONG *ap,
92$bp=$a2; # const BN_ULONG *bp,
93$np=$a3; # const BN_ULONG *np,
94$n0=$a4; # const BN_ULONG *n0,
95$num=$a5; # int num);
96
97$lo0=$a6;
98$hi0=$a7;
99$lo1=$t1;
100$hi1=$t2;
101$aj=$s0;
102$bi=$s1;
103$nj=$s2;
104$tp=$s3;
105$alo=$s4;
106$ahi=$s5;
107$nlo=$s6;
108$nhi=$s7;
109$tj=$s8;
110$i=$s9;
111$j=$s10;
112$m1=$s11;
113
114$FRAMESIZE=14;
115
116$code=<<___;
117.text
118
119.set noat
120.set noreorder
121
122.align 5
123.globl bn_mul_mont
124.ent bn_mul_mont
125bn_mul_mont:
126___
127$code.=<<___ if ($flavour =~ /o32/i);
128 lw $n0,16($sp)
129 lw $num,20($sp)
130___
131$code.=<<___;
132 slt $at,$num,4
133 bnez $at,1f
134 li $t0,0
135 slt $at,$num,17 # on in-order CPU
136 bnez $at,bn_mul_mont_internal
137 nop
1381: jr $ra
139 li $a0,0
140.end bn_mul_mont
141
142.align 5
143.ent bn_mul_mont_internal
144bn_mul_mont_internal:
145 .frame $fp,$FRAMESIZE*$SZREG,$ra
146 .mask 0x40000000|$SAVED_REGS_MASK,-$SZREG
147 $PTR_SUB $sp,$FRAMESIZE*$SZREG
148 $REG_S $fp,($FRAMESIZE-1)*$SZREG($sp)
149 $REG_S $s11,($FRAMESIZE-2)*$SZREG($sp)
150 $REG_S $s10,($FRAMESIZE-3)*$SZREG($sp)
151 $REG_S $s9,($FRAMESIZE-4)*$SZREG($sp)
152 $REG_S $s8,($FRAMESIZE-5)*$SZREG($sp)
153 $REG_S $s7,($FRAMESIZE-6)*$SZREG($sp)
154 $REG_S $s6,($FRAMESIZE-7)*$SZREG($sp)
155 $REG_S $s5,($FRAMESIZE-8)*$SZREG($sp)
156 $REG_S $s4,($FRAMESIZE-9)*$SZREG($sp)
157___
158$code.=<<___ if ($flavour =~ /nubi/i);
159 $REG_S $s3,($FRAMESIZE-10)*$SZREG($sp)
160 $REG_S $s2,($FRAMESIZE-11)*$SZREG($sp)
161 $REG_S $s1,($FRAMESIZE-12)*$SZREG($sp)
162 $REG_S $s0,($FRAMESIZE-13)*$SZREG($sp)
163___
164$code.=<<___;
165 move $fp,$sp
166
167 .set reorder
168 $LD $n0,0($n0)
169 $LD $bi,0($bp) # bp[0]
170 $LD $aj,0($ap) # ap[0]
171 $LD $nj,0($np) # np[0]
172
173 $PTR_SUB $sp,2*$BNSZ # place for two extra words
174 sll $num,`log($BNSZ)/log(2)`
175 li $at,-4096
176 $PTR_SUB $sp,$num
177 and $sp,$at
178
179 $MULTU $aj,$bi
180 $LD $alo,$BNSZ($ap)
181 $LD $nlo,$BNSZ($np)
182 mflo $lo0
183 mfhi $hi0
184 $MULTU $lo0,$n0
185 mflo $m1
186
187 $MULTU $alo,$bi
188 mflo $alo
189 mfhi $ahi
190
191 $MULTU $nj,$m1
192 mflo $lo1
193 mfhi $hi1
194 $MULTU $nlo,$m1
195 $ADDU $lo1,$lo0
196 sltu $at,$lo1,$lo0
197 $ADDU $hi1,$at
198 mflo $nlo
199 mfhi $nhi
200
201 move $tp,$sp
202 li $j,2*$BNSZ
203.align 4
204.L1st:
205 .set noreorder
206 $PTR_ADD $aj,$ap,$j
207 $PTR_ADD $nj,$np,$j
208 $LD $aj,($aj)
209 $LD $nj,($nj)
210
211 $MULTU $aj,$bi
212 $ADDU $lo0,$alo,$hi0
213 $ADDU $lo1,$nlo,$hi1
214 sltu $at,$lo0,$hi0
215 sltu $t0,$lo1,$hi1
216 $ADDU $hi0,$ahi,$at
217 $ADDU $hi1,$nhi,$t0
218 mflo $alo
219 mfhi $ahi
220
221 $ADDU $lo1,$lo0
222 sltu $at,$lo1,$lo0
223 $MULTU $nj,$m1
224 $ADDU $hi1,$at
225 addu $j,$BNSZ
226 $ST $lo1,($tp)
227 sltu $t0,$j,$num
228 mflo $nlo
229 mfhi $nhi
230
231 bnez $t0,.L1st
232 $PTR_ADD $tp,$BNSZ
233 .set reorder
234
235 $ADDU $lo0,$alo,$hi0
236 sltu $at,$lo0,$hi0
237 $ADDU $hi0,$ahi,$at
238
239 $ADDU $lo1,$nlo,$hi1
240 sltu $t0,$lo1,$hi1
241 $ADDU $hi1,$nhi,$t0
242 $ADDU $lo1,$lo0
243 sltu $at,$lo1,$lo0
244 $ADDU $hi1,$at
245
246 $ST $lo1,($tp)
247
248 $ADDU $hi1,$hi0
249 sltu $at,$hi1,$hi0
250 $ST $hi1,$BNSZ($tp)
251 $ST $at,2*$BNSZ($tp)
252
253 li $i,$BNSZ
254.align 4
255.Louter:
256 $PTR_ADD $bi,$bp,$i
257 $LD $bi,($bi)
258 $LD $aj,($ap)
259 $LD $alo,$BNSZ($ap)
260 $LD $tj,($sp)
261
262 $MULTU $aj,$bi
263 $LD $nj,($np)
264 $LD $nlo,$BNSZ($np)
265 mflo $lo0
266 mfhi $hi0
267 $ADDU $lo0,$tj
268 $MULTU $lo0,$n0
269 sltu $at,$lo0,$tj
270 $ADDU $hi0,$at
271 mflo $m1
272
273 $MULTU $alo,$bi
274 mflo $alo
275 mfhi $ahi
276
277 $MULTU $nj,$m1
278 mflo $lo1
279 mfhi $hi1
280
281 $MULTU $nlo,$m1
282 $ADDU $lo1,$lo0
283 sltu $at,$lo1,$lo0
284 $ADDU $hi1,$at
285 mflo $nlo
286 mfhi $nhi
287
288 move $tp,$sp
289 li $j,2*$BNSZ
290 $LD $tj,$BNSZ($tp)
291.align 4
292.Linner:
293 .set noreorder
294 $PTR_ADD $aj,$ap,$j
295 $PTR_ADD $nj,$np,$j
296 $LD $aj,($aj)
297 $LD $nj,($nj)
298
299 $MULTU $aj,$bi
300 $ADDU $lo0,$alo,$hi0
301 $ADDU $lo1,$nlo,$hi1
302 sltu $at,$lo0,$hi0
303 sltu $t0,$lo1,$hi1
304 $ADDU $hi0,$ahi,$at
305 $ADDU $hi1,$nhi,$t0
306 mflo $alo
307 mfhi $ahi
308
309 $ADDU $lo0,$tj
310 addu $j,$BNSZ
311 $MULTU $nj,$m1
312 sltu $at,$lo0,$tj
313 $ADDU $lo1,$lo0
314 $ADDU $hi0,$at
315 sltu $t0,$lo1,$lo0
316 $LD $tj,2*$BNSZ($tp)
317 $ADDU $hi1,$t0
318 sltu $at,$j,$num
319 mflo $nlo
320 mfhi $nhi
321 $ST $lo1,($tp)
322 bnez $at,.Linner
323 $PTR_ADD $tp,$BNSZ
324 .set reorder
325
326 $ADDU $lo0,$alo,$hi0
327 sltu $at,$lo0,$hi0
328 $ADDU $hi0,$ahi,$at
329 $ADDU $lo0,$tj
330 sltu $t0,$lo0,$tj
331 $ADDU $hi0,$t0
332
333 $LD $tj,2*$BNSZ($tp)
334 $ADDU $lo1,$nlo,$hi1
335 sltu $at,$lo1,$hi1
336 $ADDU $hi1,$nhi,$at
337 $ADDU $lo1,$lo0
338 sltu $t0,$lo1,$lo0
339 $ADDU $hi1,$t0
340 $ST $lo1,($tp)
341
342 $ADDU $lo1,$hi1,$hi0
343 sltu $hi1,$lo1,$hi0
344 $ADDU $lo1,$tj
345 sltu $at,$lo1,$tj
346 $ADDU $hi1,$at
347 $ST $lo1,$BNSZ($tp)
348 $ST $hi1,2*$BNSZ($tp)
349
350 addu $i,$BNSZ
351 sltu $t0,$i,$num
352 bnez $t0,.Louter
353
354 .set noreorder
355 $PTR_ADD $tj,$sp,$num # &tp[num]
356 move $tp,$sp
357 move $ap,$sp
358 li $hi0,0 # clear borrow bit
359
360.align 4
361.Lsub: $LD $lo0,($tp)
362 $LD $lo1,($np)
363 $PTR_ADD $tp,$BNSZ
364 $PTR_ADD $np,$BNSZ
365 $SUBU $lo1,$lo0,$lo1 # tp[i]-np[i]
366 sgtu $at,$lo1,$lo0
367 $SUBU $lo0,$lo1,$hi0
368 sgtu $hi0,$lo0,$lo1
369 $ST $lo0,($rp)
370 or $hi0,$at
371 sltu $at,$tp,$tj
372 bnez $at,.Lsub
373 $PTR_ADD $rp,$BNSZ
374
375 $SUBU $hi0,$hi1,$hi0 # handle upmost overflow bit
376 move $tp,$sp
377 $PTR_SUB $rp,$num # restore rp
378 not $hi1,$hi0
379
380 and $ap,$hi0,$sp
381 and $bp,$hi1,$rp
382 or $ap,$ap,$bp # ap=borrow?tp:rp
383
384.align 4
385.Lcopy: $LD $aj,($ap)
386 $PTR_ADD $ap,$BNSZ
387 $ST $zero,($tp)
388 $PTR_ADD $tp,$BNSZ
389 sltu $at,$tp,$tj
390 $ST $aj,($rp)
391 bnez $at,.Lcopy
392 $PTR_ADD $rp,$BNSZ
393
394 li $a0,1
395 li $t0,1
396
397 .set noreorder
398 move $sp,$fp
399 $REG_L $fp,($FRAMESIZE-1)*$SZREG($sp)
400 $REG_L $s11,($FRAMESIZE-2)*$SZREG($sp)
401 $REG_L $s10,($FRAMESIZE-3)*$SZREG($sp)
402 $REG_L $s9,($FRAMESIZE-4)*$SZREG($sp)
403 $REG_L $s8,($FRAMESIZE-5)*$SZREG($sp)
404 $REG_L $s7,($FRAMESIZE-6)*$SZREG($sp)
405 $REG_L $s6,($FRAMESIZE-7)*$SZREG($sp)
406 $REG_L $s5,($FRAMESIZE-8)*$SZREG($sp)
407 $REG_L $s4,($FRAMESIZE-9)*$SZREG($sp)
408___
409$code.=<<___ if ($flavour =~ /nubi/i);
410 $REG_L $s3,($FRAMESIZE-10)*$SZREG($sp)
411 $REG_L $s2,($FRAMESIZE-11)*$SZREG($sp)
412 $REG_L $s1,($FRAMESIZE-12)*$SZREG($sp)
413 $REG_L $s0,($FRAMESIZE-13)*$SZREG($sp)
414___
415$code.=<<___;
416 jr $ra
417 $PTR_ADD $sp,$FRAMESIZE*$SZREG
418.end bn_mul_mont_internal
419.rdata
420.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
421___
422
423$code =~ s/\`([^\`]*)\`/eval $1/gem;
424
425print $code;
426close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/mips.pl b/src/lib/libcrypto/bn/asm/mips.pl
deleted file mode 100644
index 02d43e15b0..0000000000
--- a/src/lib/libcrypto/bn/asm/mips.pl
+++ /dev/null
@@ -1,2234 +0,0 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project.
6#
7# Rights for redistribution and usage in source and binary forms are
8# granted according to the OpenSSL license. Warranty of any kind is
9# disclaimed.
10# ====================================================================
11
12
13# July 1999
14#
15# This is drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c.
16#
17# The module is designed to work with either of the "new" MIPS ABI(5),
18# namely N32 or N64, offered by IRIX 6.x. It's not meant to work under
19# IRIX 5.x not only because it doesn't support new ABIs but also
20# because 5.x kernels put R4x00 CPU into 32-bit mode and all those
21# 64-bit instructions (daddu, dmultu, etc.) found below gonna only
22# cause illegal instruction exception:-(
23#
24# In addition the code depends on preprocessor flags set up by MIPSpro
25# compiler driver (either as or cc) and therefore (probably?) can't be
26# compiled by the GNU assembler. GNU C driver manages fine though...
27# I mean as long as -mmips-as is specified or is the default option,
28# because then it simply invokes /usr/bin/as which in turn takes
29# perfect care of the preprocessor definitions. Another neat feature
30# offered by the MIPSpro assembler is an optimization pass. This gave
31# me the opportunity to have the code looking more regular as all those
32# architecture dependent instruction rescheduling details were left to
33# the assembler. Cool, huh?
34#
35# Performance improvement is astonishing! 'apps/openssl speed rsa dsa'
36# goes way over 3 times faster!
37#
38# <appro@fy.chalmers.se>
39
40# October 2010
41#
42# Adapt the module even for 32-bit ABIs and other OSes. The former was
43# achieved by mechanical replacement of 64-bit arithmetic instructions
44# such as dmultu, daddu, etc. with their 32-bit counterparts and
45# adjusting offsets denoting multiples of BN_ULONG. Above mentioned
46# >3x performance improvement naturally does not apply to 32-bit code
47# [because there is no instruction 32-bit compiler can't use], one
48# has to content with 40-85% improvement depending on benchmark and
49# key length, more for longer keys.
50
51$flavour = shift;
52while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
53open STDOUT,">$output";
54
55if ($flavour =~ /64|n32/i) {
56 $LD="ld";
57 $ST="sd";
58 $MULTU="dmultu";
59 $DIVU="ddivu";
60 $ADDU="daddu";
61 $SUBU="dsubu";
62 $SRL="dsrl";
63 $SLL="dsll";
64 $BNSZ=8;
65 $PTR_ADD="daddu";
66 $PTR_SUB="dsubu";
67 $SZREG=8;
68 $REG_S="sd";
69 $REG_L="ld";
70} else {
71 $LD="lw";
72 $ST="sw";
73 $MULTU="multu";
74 $DIVU="divu";
75 $ADDU="addu";
76 $SUBU="subu";
77 $SRL="srl";
78 $SLL="sll";
79 $BNSZ=4;
80 $PTR_ADD="addu";
81 $PTR_SUB="subu";
82 $SZREG=4;
83 $REG_S="sw";
84 $REG_L="lw";
85 $code=".set mips2\n";
86}
87
88# Below is N32/64 register layout used in the original module.
89#
90($zero,$at,$v0,$v1)=map("\$$_",(0..3));
91($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
92($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
93($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
94($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
95($ta0,$ta1,$ta2,$ta3)=($a4,$a5,$a6,$a7);
96#
97# No special adaptation is required for O32. NUBI on the other hand
98# is treated by saving/restoring ($v1,$t0..$t3).
99
100$gp=$v1 if ($flavour =~ /nubi/i);
101
102$minus4=$v1;
103
104$code.=<<___;
105.rdata
106.asciiz "mips3.s, Version 1.2"
107.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>"
108
109.text
110.set noat
111
112.align 5
113.globl bn_mul_add_words
114.ent bn_mul_add_words
115bn_mul_add_words:
116 .set noreorder
117 bgtz $a2,bn_mul_add_words_internal
118 move $v0,$zero
119 jr $ra
120 move $a0,$v0
121.end bn_mul_add_words
122
123.align 5
124.ent bn_mul_add_words_internal
125bn_mul_add_words_internal:
126___
127$code.=<<___ if ($flavour =~ /nubi/i);
128 .frame $sp,6*$SZREG,$ra
129 .mask 0x8000f008,-$SZREG
130 .set noreorder
131 $PTR_SUB $sp,6*$SZREG
132 $REG_S $ra,5*$SZREG($sp)
133 $REG_S $t3,4*$SZREG($sp)
134 $REG_S $t2,3*$SZREG($sp)
135 $REG_S $t1,2*$SZREG($sp)
136 $REG_S $t0,1*$SZREG($sp)
137 $REG_S $gp,0*$SZREG($sp)
138___
139$code.=<<___;
140 .set reorder
141 li $minus4,-4
142 and $ta0,$a2,$minus4
143 beqz $ta0,.L_bn_mul_add_words_tail
144
145.L_bn_mul_add_words_loop:
146 $LD $t0,0($a1)
147 $MULTU $t0,$a3
148 $LD $t1,0($a0)
149 $LD $t2,$BNSZ($a1)
150 $LD $t3,$BNSZ($a0)
151 $LD $ta0,2*$BNSZ($a1)
152 $LD $ta1,2*$BNSZ($a0)
153 $ADDU $t1,$v0
154 sltu $v0,$t1,$v0 # All manuals say it "compares 32-bit
155 # values", but it seems to work fine
156 # even on 64-bit registers.
157 mflo $at
158 mfhi $t0
159 $ADDU $t1,$at
160 $ADDU $v0,$t0
161 $MULTU $t2,$a3
162 sltu $at,$t1,$at
163 $ST $t1,0($a0)
164 $ADDU $v0,$at
165
166 $LD $ta2,3*$BNSZ($a1)
167 $LD $ta3,3*$BNSZ($a0)
168 $ADDU $t3,$v0
169 sltu $v0,$t3,$v0
170 mflo $at
171 mfhi $t2
172 $ADDU $t3,$at
173 $ADDU $v0,$t2
174 $MULTU $ta0,$a3
175 sltu $at,$t3,$at
176 $ST $t3,$BNSZ($a0)
177 $ADDU $v0,$at
178
179 subu $a2,4
180 $PTR_ADD $a0,4*$BNSZ
181 $PTR_ADD $a1,4*$BNSZ
182 $ADDU $ta1,$v0
183 sltu $v0,$ta1,$v0
184 mflo $at
185 mfhi $ta0
186 $ADDU $ta1,$at
187 $ADDU $v0,$ta0
188 $MULTU $ta2,$a3
189 sltu $at,$ta1,$at
190 $ST $ta1,-2*$BNSZ($a0)
191 $ADDU $v0,$at
192
193
194 and $ta0,$a2,$minus4
195 $ADDU $ta3,$v0
196 sltu $v0,$ta3,$v0
197 mflo $at
198 mfhi $ta2
199 $ADDU $ta3,$at
200 $ADDU $v0,$ta2
201 sltu $at,$ta3,$at
202 $ST $ta3,-$BNSZ($a0)
203 .set noreorder
204 bgtz $ta0,.L_bn_mul_add_words_loop
205 $ADDU $v0,$at
206
207 beqz $a2,.L_bn_mul_add_words_return
208 nop
209
210.L_bn_mul_add_words_tail:
211 .set reorder
212 $LD $t0,0($a1)
213 $MULTU $t0,$a3
214 $LD $t1,0($a0)
215 subu $a2,1
216 $ADDU $t1,$v0
217 sltu $v0,$t1,$v0
218 mflo $at
219 mfhi $t0
220 $ADDU $t1,$at
221 $ADDU $v0,$t0
222 sltu $at,$t1,$at
223 $ST $t1,0($a0)
224 $ADDU $v0,$at
225 beqz $a2,.L_bn_mul_add_words_return
226
227 $LD $t0,$BNSZ($a1)
228 $MULTU $t0,$a3
229 $LD $t1,$BNSZ($a0)
230 subu $a2,1
231 $ADDU $t1,$v0
232 sltu $v0,$t1,$v0
233 mflo $at
234 mfhi $t0
235 $ADDU $t1,$at
236 $ADDU $v0,$t0
237 sltu $at,$t1,$at
238 $ST $t1,$BNSZ($a0)
239 $ADDU $v0,$at
240 beqz $a2,.L_bn_mul_add_words_return
241
242 $LD $t0,2*$BNSZ($a1)
243 $MULTU $t0,$a3
244 $LD $t1,2*$BNSZ($a0)
245 $ADDU $t1,$v0
246 sltu $v0,$t1,$v0
247 mflo $at
248 mfhi $t0
249 $ADDU $t1,$at
250 $ADDU $v0,$t0
251 sltu $at,$t1,$at
252 $ST $t1,2*$BNSZ($a0)
253 $ADDU $v0,$at
254
255.L_bn_mul_add_words_return:
256 .set noreorder
257___
258$code.=<<___ if ($flavour =~ /nubi/i);
259 $REG_L $t3,4*$SZREG($sp)
260 $REG_L $t2,3*$SZREG($sp)
261 $REG_L $t1,2*$SZREG($sp)
262 $REG_L $t0,1*$SZREG($sp)
263 $REG_L $gp,0*$SZREG($sp)
264 $PTR_ADD $sp,6*$SZREG
265___
266$code.=<<___;
267 jr $ra
268 move $a0,$v0
269.end bn_mul_add_words_internal
270
271.align 5
272.globl bn_mul_words
273.ent bn_mul_words
274bn_mul_words:
275 .set noreorder
276 bgtz $a2,bn_mul_words_internal
277 move $v0,$zero
278 jr $ra
279 move $a0,$v0
280.end bn_mul_words
281
282.align 5
283.ent bn_mul_words_internal
284bn_mul_words_internal:
285___
286$code.=<<___ if ($flavour =~ /nubi/i);
287 .frame $sp,6*$SZREG,$ra
288 .mask 0x8000f008,-$SZREG
289 .set noreorder
290 $PTR_SUB $sp,6*$SZREG
291 $REG_S $ra,5*$SZREG($sp)
292 $REG_S $t3,4*$SZREG($sp)
293 $REG_S $t2,3*$SZREG($sp)
294 $REG_S $t1,2*$SZREG($sp)
295 $REG_S $t0,1*$SZREG($sp)
296 $REG_S $gp,0*$SZREG($sp)
297___
298$code.=<<___;
299 .set reorder
300 li $minus4,-4
301 and $ta0,$a2,$minus4
302 beqz $ta0,.L_bn_mul_words_tail
303
304.L_bn_mul_words_loop:
305 $LD $t0,0($a1)
306 $MULTU $t0,$a3
307 $LD $t2,$BNSZ($a1)
308 $LD $ta0,2*$BNSZ($a1)
309 $LD $ta2,3*$BNSZ($a1)
310 mflo $at
311 mfhi $t0
312 $ADDU $v0,$at
313 sltu $t1,$v0,$at
314 $MULTU $t2,$a3
315 $ST $v0,0($a0)
316 $ADDU $v0,$t1,$t0
317
318 subu $a2,4
319 $PTR_ADD $a0,4*$BNSZ
320 $PTR_ADD $a1,4*$BNSZ
321 mflo $at
322 mfhi $t2
323 $ADDU $v0,$at
324 sltu $t3,$v0,$at
325 $MULTU $ta0,$a3
326 $ST $v0,-3*$BNSZ($a0)
327 $ADDU $v0,$t3,$t2
328
329 mflo $at
330 mfhi $ta0
331 $ADDU $v0,$at
332 sltu $ta1,$v0,$at
333 $MULTU $ta2,$a3
334 $ST $v0,-2*$BNSZ($a0)
335 $ADDU $v0,$ta1,$ta0
336
337 and $ta0,$a2,$minus4
338 mflo $at
339 mfhi $ta2
340 $ADDU $v0,$at
341 sltu $ta3,$v0,$at
342 $ST $v0,-$BNSZ($a0)
343 .set noreorder
344 bgtz $ta0,.L_bn_mul_words_loop
345 $ADDU $v0,$ta3,$ta2
346
347 beqz $a2,.L_bn_mul_words_return
348 nop
349
350.L_bn_mul_words_tail:
351 .set reorder
352 $LD $t0,0($a1)
353 $MULTU $t0,$a3
354 subu $a2,1
355 mflo $at
356 mfhi $t0
357 $ADDU $v0,$at
358 sltu $t1,$v0,$at
359 $ST $v0,0($a0)
360 $ADDU $v0,$t1,$t0
361 beqz $a2,.L_bn_mul_words_return
362
363 $LD $t0,$BNSZ($a1)
364 $MULTU $t0,$a3
365 subu $a2,1
366 mflo $at
367 mfhi $t0
368 $ADDU $v0,$at
369 sltu $t1,$v0,$at
370 $ST $v0,$BNSZ($a0)
371 $ADDU $v0,$t1,$t0
372 beqz $a2,.L_bn_mul_words_return
373
374 $LD $t0,2*$BNSZ($a1)
375 $MULTU $t0,$a3
376 mflo $at
377 mfhi $t0
378 $ADDU $v0,$at
379 sltu $t1,$v0,$at
380 $ST $v0,2*$BNSZ($a0)
381 $ADDU $v0,$t1,$t0
382
383.L_bn_mul_words_return:
384 .set noreorder
385___
386$code.=<<___ if ($flavour =~ /nubi/i);
387 $REG_L $t3,4*$SZREG($sp)
388 $REG_L $t2,3*$SZREG($sp)
389 $REG_L $t1,2*$SZREG($sp)
390 $REG_L $t0,1*$SZREG($sp)
391 $REG_L $gp,0*$SZREG($sp)
392 $PTR_ADD $sp,6*$SZREG
393___
394$code.=<<___;
395 jr $ra
396 move $a0,$v0
397.end bn_mul_words_internal
398
399.align 5
400.globl bn_sqr_words
401.ent bn_sqr_words
402bn_sqr_words:
403 .set noreorder
404 bgtz $a2,bn_sqr_words_internal
405 move $v0,$zero
406 jr $ra
407 move $a0,$v0
408.end bn_sqr_words
409
410.align 5
411.ent bn_sqr_words_internal
412bn_sqr_words_internal:
413___
414$code.=<<___ if ($flavour =~ /nubi/i);
415 .frame $sp,6*$SZREG,$ra
416 .mask 0x8000f008,-$SZREG
417 .set noreorder
418 $PTR_SUB $sp,6*$SZREG
419 $REG_S $ra,5*$SZREG($sp)
420 $REG_S $t3,4*$SZREG($sp)
421 $REG_S $t2,3*$SZREG($sp)
422 $REG_S $t1,2*$SZREG($sp)
423 $REG_S $t0,1*$SZREG($sp)
424 $REG_S $gp,0*$SZREG($sp)
425___
426$code.=<<___;
427 .set reorder
428 li $minus4,-4
429 and $ta0,$a2,$minus4
430 beqz $ta0,.L_bn_sqr_words_tail
431
432.L_bn_sqr_words_loop:
433 $LD $t0,0($a1)
434 $MULTU $t0,$t0
435 $LD $t2,$BNSZ($a1)
436 $LD $ta0,2*$BNSZ($a1)
437 $LD $ta2,3*$BNSZ($a1)
438 mflo $t1
439 mfhi $t0
440 $ST $t1,0($a0)
441 $ST $t0,$BNSZ($a0)
442
443 $MULTU $t2,$t2
444 subu $a2,4
445 $PTR_ADD $a0,8*$BNSZ
446 $PTR_ADD $a1,4*$BNSZ
447 mflo $t3
448 mfhi $t2
449 $ST $t3,-6*$BNSZ($a0)
450 $ST $t2,-5*$BNSZ($a0)
451
452 $MULTU $ta0,$ta0
453 mflo $ta1
454 mfhi $ta0
455 $ST $ta1,-4*$BNSZ($a0)
456 $ST $ta0,-3*$BNSZ($a0)
457
458
459 $MULTU $ta2,$ta2
460 and $ta0,$a2,$minus4
461 mflo $ta3
462 mfhi $ta2
463 $ST $ta3,-2*$BNSZ($a0)
464
465 .set noreorder
466 bgtz $ta0,.L_bn_sqr_words_loop
467 $ST $ta2,-$BNSZ($a0)
468
469 beqz $a2,.L_bn_sqr_words_return
470 nop
471
472.L_bn_sqr_words_tail:
473 .set reorder
474 $LD $t0,0($a1)
475 $MULTU $t0,$t0
476 subu $a2,1
477 mflo $t1
478 mfhi $t0
479 $ST $t1,0($a0)
480 $ST $t0,$BNSZ($a0)
481 beqz $a2,.L_bn_sqr_words_return
482
483 $LD $t0,$BNSZ($a1)
484 $MULTU $t0,$t0
485 subu $a2,1
486 mflo $t1
487 mfhi $t0
488 $ST $t1,2*$BNSZ($a0)
489 $ST $t0,3*$BNSZ($a0)
490 beqz $a2,.L_bn_sqr_words_return
491
492 $LD $t0,2*$BNSZ($a1)
493 $MULTU $t0,$t0
494 mflo $t1
495 mfhi $t0
496 $ST $t1,4*$BNSZ($a0)
497 $ST $t0,5*$BNSZ($a0)
498
499.L_bn_sqr_words_return:
500 .set noreorder
501___
502$code.=<<___ if ($flavour =~ /nubi/i);
503 $REG_L $t3,4*$SZREG($sp)
504 $REG_L $t2,3*$SZREG($sp)
505 $REG_L $t1,2*$SZREG($sp)
506 $REG_L $t0,1*$SZREG($sp)
507 $REG_L $gp,0*$SZREG($sp)
508 $PTR_ADD $sp,6*$SZREG
509___
510$code.=<<___;
511 jr $ra
512 move $a0,$v0
513
514.end bn_sqr_words_internal
515
516.align 5
517.globl bn_add_words
518.ent bn_add_words
519bn_add_words:
520 .set noreorder
521 bgtz $a3,bn_add_words_internal
522 move $v0,$zero
523 jr $ra
524 move $a0,$v0
525.end bn_add_words
526
527.align 5
528.ent bn_add_words_internal
529bn_add_words_internal:
530___
531$code.=<<___ if ($flavour =~ /nubi/i);
532 .frame $sp,6*$SZREG,$ra
533 .mask 0x8000f008,-$SZREG
534 .set noreorder
535 $PTR_SUB $sp,6*$SZREG
536 $REG_S $ra,5*$SZREG($sp)
537 $REG_S $t3,4*$SZREG($sp)
538 $REG_S $t2,3*$SZREG($sp)
539 $REG_S $t1,2*$SZREG($sp)
540 $REG_S $t0,1*$SZREG($sp)
541 $REG_S $gp,0*$SZREG($sp)
542___
543$code.=<<___;
544 .set reorder
545 li $minus4,-4
546 and $at,$a3,$minus4
547 beqz $at,.L_bn_add_words_tail
548
549.L_bn_add_words_loop:
550 $LD $t0,0($a1)
551 $LD $ta0,0($a2)
552 subu $a3,4
553 $LD $t1,$BNSZ($a1)
554 and $at,$a3,$minus4
555 $LD $t2,2*$BNSZ($a1)
556 $PTR_ADD $a2,4*$BNSZ
557 $LD $t3,3*$BNSZ($a1)
558 $PTR_ADD $a0,4*$BNSZ
559 $LD $ta1,-3*$BNSZ($a2)
560 $PTR_ADD $a1,4*$BNSZ
561 $LD $ta2,-2*$BNSZ($a2)
562 $LD $ta3,-$BNSZ($a2)
563 $ADDU $ta0,$t0
564 sltu $t8,$ta0,$t0
565 $ADDU $t0,$ta0,$v0
566 sltu $v0,$t0,$ta0
567 $ST $t0,-4*$BNSZ($a0)
568 $ADDU $v0,$t8
569
570 $ADDU $ta1,$t1
571 sltu $t9,$ta1,$t1
572 $ADDU $t1,$ta1,$v0
573 sltu $v0,$t1,$ta1
574 $ST $t1,-3*$BNSZ($a0)
575 $ADDU $v0,$t9
576
577 $ADDU $ta2,$t2
578 sltu $t8,$ta2,$t2
579 $ADDU $t2,$ta2,$v0
580 sltu $v0,$t2,$ta2
581 $ST $t2,-2*$BNSZ($a0)
582 $ADDU $v0,$t8
583
584 $ADDU $ta3,$t3
585 sltu $t9,$ta3,$t3
586 $ADDU $t3,$ta3,$v0
587 sltu $v0,$t3,$ta3
588 $ST $t3,-$BNSZ($a0)
589
590 .set noreorder
591 bgtz $at,.L_bn_add_words_loop
592 $ADDU $v0,$t9
593
594 beqz $a3,.L_bn_add_words_return
595 nop
596
597.L_bn_add_words_tail:
598 .set reorder
599 $LD $t0,0($a1)
600 $LD $ta0,0($a2)
601 $ADDU $ta0,$t0
602 subu $a3,1
603 sltu $t8,$ta0,$t0
604 $ADDU $t0,$ta0,$v0
605 sltu $v0,$t0,$ta0
606 $ST $t0,0($a0)
607 $ADDU $v0,$t8
608 beqz $a3,.L_bn_add_words_return
609
610 $LD $t1,$BNSZ($a1)
611 $LD $ta1,$BNSZ($a2)
612 $ADDU $ta1,$t1
613 subu $a3,1
614 sltu $t9,$ta1,$t1
615 $ADDU $t1,$ta1,$v0
616 sltu $v0,$t1,$ta1
617 $ST $t1,$BNSZ($a0)
618 $ADDU $v0,$t9
619 beqz $a3,.L_bn_add_words_return
620
621 $LD $t2,2*$BNSZ($a1)
622 $LD $ta2,2*$BNSZ($a2)
623 $ADDU $ta2,$t2
624 sltu $t8,$ta2,$t2
625 $ADDU $t2,$ta2,$v0
626 sltu $v0,$t2,$ta2
627 $ST $t2,2*$BNSZ($a0)
628 $ADDU $v0,$t8
629
630.L_bn_add_words_return:
631 .set noreorder
632___
633$code.=<<___ if ($flavour =~ /nubi/i);
634 $REG_L $t3,4*$SZREG($sp)
635 $REG_L $t2,3*$SZREG($sp)
636 $REG_L $t1,2*$SZREG($sp)
637 $REG_L $t0,1*$SZREG($sp)
638 $REG_L $gp,0*$SZREG($sp)
639 $PTR_ADD $sp,6*$SZREG
640___
641$code.=<<___;
642 jr $ra
643 move $a0,$v0
644
645.end bn_add_words_internal
646
647.align 5
648.globl bn_sub_words
649.ent bn_sub_words
650bn_sub_words:
651 .set noreorder
652 bgtz $a3,bn_sub_words_internal
653 move $v0,$zero
654 jr $ra
655 move $a0,$zero
656.end bn_sub_words
657
658.align 5
659.ent bn_sub_words_internal
660bn_sub_words_internal:
661___
662$code.=<<___ if ($flavour =~ /nubi/i);
663 .frame $sp,6*$SZREG,$ra
664 .mask 0x8000f008,-$SZREG
665 .set noreorder
666 $PTR_SUB $sp,6*$SZREG
667 $REG_S $ra,5*$SZREG($sp)
668 $REG_S $t3,4*$SZREG($sp)
669 $REG_S $t2,3*$SZREG($sp)
670 $REG_S $t1,2*$SZREG($sp)
671 $REG_S $t0,1*$SZREG($sp)
672 $REG_S $gp,0*$SZREG($sp)
673___
674$code.=<<___;
675 .set reorder
676 li $minus4,-4
677 and $at,$a3,$minus4
678 beqz $at,.L_bn_sub_words_tail
679
680.L_bn_sub_words_loop:
681 $LD $t0,0($a1)
682 $LD $ta0,0($a2)
683 subu $a3,4
684 $LD $t1,$BNSZ($a1)
685 and $at,$a3,$minus4
686 $LD $t2,2*$BNSZ($a1)
687 $PTR_ADD $a2,4*$BNSZ
688 $LD $t3,3*$BNSZ($a1)
689 $PTR_ADD $a0,4*$BNSZ
690 $LD $ta1,-3*$BNSZ($a2)
691 $PTR_ADD $a1,4*$BNSZ
692 $LD $ta2,-2*$BNSZ($a2)
693 $LD $ta3,-$BNSZ($a2)
694 sltu $t8,$t0,$ta0
695 $SUBU $ta0,$t0,$ta0
696 $SUBU $t0,$ta0,$v0
697 sgtu $v0,$t0,$ta0
698 $ST $t0,-4*$BNSZ($a0)
699 $ADDU $v0,$t8
700
701 sltu $t9,$t1,$ta1
702 $SUBU $ta1,$t1,$ta1
703 $SUBU $t1,$ta1,$v0
704 sgtu $v0,$t1,$ta1
705 $ST $t1,-3*$BNSZ($a0)
706 $ADDU $v0,$t9
707
708
709 sltu $t8,$t2,$ta2
710 $SUBU $ta2,$t2,$ta2
711 $SUBU $t2,$ta2,$v0
712 sgtu $v0,$t2,$ta2
713 $ST $t2,-2*$BNSZ($a0)
714 $ADDU $v0,$t8
715
716 sltu $t9,$t3,$ta3
717 $SUBU $ta3,$t3,$ta3
718 $SUBU $t3,$ta3,$v0
719 sgtu $v0,$t3,$ta3
720 $ST $t3,-$BNSZ($a0)
721
722 .set noreorder
723 bgtz $at,.L_bn_sub_words_loop
724 $ADDU $v0,$t9
725
726 beqz $a3,.L_bn_sub_words_return
727 nop
728
729.L_bn_sub_words_tail:
730 .set reorder
731 $LD $t0,0($a1)
732 $LD $ta0,0($a2)
733 subu $a3,1
734 sltu $t8,$t0,$ta0
735 $SUBU $ta0,$t0,$ta0
736 $SUBU $t0,$ta0,$v0
737 sgtu $v0,$t0,$ta0
738 $ST $t0,0($a0)
739 $ADDU $v0,$t8
740 beqz $a3,.L_bn_sub_words_return
741
742 $LD $t1,$BNSZ($a1)
743 subu $a3,1
744 $LD $ta1,$BNSZ($a2)
745 sltu $t9,$t1,$ta1
746 $SUBU $ta1,$t1,$ta1
747 $SUBU $t1,$ta1,$v0
748 sgtu $v0,$t1,$ta1
749 $ST $t1,$BNSZ($a0)
750 $ADDU $v0,$t9
751 beqz $a3,.L_bn_sub_words_return
752
753 $LD $t2,2*$BNSZ($a1)
754 $LD $ta2,2*$BNSZ($a2)
755 sltu $t8,$t2,$ta2
756 $SUBU $ta2,$t2,$ta2
757 $SUBU $t2,$ta2,$v0
758 sgtu $v0,$t2,$ta2
759 $ST $t2,2*$BNSZ($a0)
760 $ADDU $v0,$t8
761
762.L_bn_sub_words_return:
763 .set noreorder
764___
765$code.=<<___ if ($flavour =~ /nubi/i);
766 $REG_L $t3,4*$SZREG($sp)
767 $REG_L $t2,3*$SZREG($sp)
768 $REG_L $t1,2*$SZREG($sp)
769 $REG_L $t0,1*$SZREG($sp)
770 $REG_L $gp,0*$SZREG($sp)
771 $PTR_ADD $sp,6*$SZREG
772___
773$code.=<<___;
774 jr $ra
775 move $a0,$v0
776.end bn_sub_words_internal
777
778.align 5
779.globl bn_div_3_words
780.ent bn_div_3_words
781bn_div_3_words:
782 .set noreorder
783 move $a3,$a0 # we know that bn_div_words does not
784 # touch $a3, $ta2, $ta3 and preserves $a2
785 # so that we can save two arguments
786 # and return address in registers
787 # instead of stack:-)
788
789 $LD $a0,($a3)
790 move $ta2,$a1
791 bne $a0,$a2,bn_div_3_words_internal
792 $LD $a1,-$BNSZ($a3)
793 li $v0,-1
794 jr $ra
795 move $a0,$v0
796.end bn_div_3_words
797
798.align 5
799.ent bn_div_3_words_internal
800bn_div_3_words_internal:
801___
802$code.=<<___ if ($flavour =~ /nubi/i);
803 .frame $sp,6*$SZREG,$ra
804 .mask 0x8000f008,-$SZREG
805 .set noreorder
806 $PTR_SUB $sp,6*$SZREG
807 $REG_S $ra,5*$SZREG($sp)
808 $REG_S $t3,4*$SZREG($sp)
809 $REG_S $t2,3*$SZREG($sp)
810 $REG_S $t1,2*$SZREG($sp)
811 $REG_S $t0,1*$SZREG($sp)
812 $REG_S $gp,0*$SZREG($sp)
813___
814$code.=<<___;
815 .set reorder
816 move $ta3,$ra
817 bal bn_div_words_internal
818 move $ra,$ta3
819 $MULTU $ta2,$v0
820 $LD $t2,-2*$BNSZ($a3)
821 move $ta0,$zero
822 mfhi $t1
823 mflo $t0
824 sltu $t8,$t1,$a1
825.L_bn_div_3_words_inner_loop:
826 bnez $t8,.L_bn_div_3_words_inner_loop_done
827 sgeu $at,$t2,$t0
828 seq $t9,$t1,$a1
829 and $at,$t9
830 sltu $t3,$t0,$ta2
831 $ADDU $a1,$a2
832 $SUBU $t1,$t3
833 $SUBU $t0,$ta2
834 sltu $t8,$t1,$a1
835 sltu $ta0,$a1,$a2
836 or $t8,$ta0
837 .set noreorder
838 beqz $at,.L_bn_div_3_words_inner_loop
839 $SUBU $v0,1
840 $ADDU $v0,1
841 .set reorder
842.L_bn_div_3_words_inner_loop_done:
843 .set noreorder
844___
845$code.=<<___ if ($flavour =~ /nubi/i);
846 $REG_L $t3,4*$SZREG($sp)
847 $REG_L $t2,3*$SZREG($sp)
848 $REG_L $t1,2*$SZREG($sp)
849 $REG_L $t0,1*$SZREG($sp)
850 $REG_L $gp,0*$SZREG($sp)
851 $PTR_ADD $sp,6*$SZREG
852___
853$code.=<<___;
854 jr $ra
855 move $a0,$v0
856.end bn_div_3_words_internal
857
858.align 5
859.globl bn_div_words
860.ent bn_div_words
861bn_div_words:
862 .set noreorder
863 bnez $a2,bn_div_words_internal
864 li $v0,-1 # I would rather signal div-by-zero
865 # which can be done with 'break 7'
866 jr $ra
867 move $a0,$v0
868.end bn_div_words
869
870.align 5
871.ent bn_div_words_internal
872bn_div_words_internal:
873___
874$code.=<<___ if ($flavour =~ /nubi/i);
875 .frame $sp,6*$SZREG,$ra
876 .mask 0x8000f008,-$SZREG
877 .set noreorder
878 $PTR_SUB $sp,6*$SZREG
879 $REG_S $ra,5*$SZREG($sp)
880 $REG_S $t3,4*$SZREG($sp)
881 $REG_S $t2,3*$SZREG($sp)
882 $REG_S $t1,2*$SZREG($sp)
883 $REG_S $t0,1*$SZREG($sp)
884 $REG_S $gp,0*$SZREG($sp)
885___
886$code.=<<___;
887 move $v1,$zero
888 bltz $a2,.L_bn_div_words_body
889 move $t9,$v1
890 $SLL $a2,1
891 bgtz $a2,.-4
892 addu $t9,1
893
894 .set reorder
895 negu $t1,$t9
896 li $t2,-1
897 $SLL $t2,$t1
898 and $t2,$a0
899 $SRL $at,$a1,$t1
900 .set noreorder
901 beqz $t2,.+12
902 nop
903 break 6 # signal overflow
904 .set reorder
905 $SLL $a0,$t9
906 $SLL $a1,$t9
907 or $a0,$at
908___
909$QT=$ta0;
910$HH=$ta1;
911$DH=$v1;
912$code.=<<___;
913.L_bn_div_words_body:
914 $SRL $DH,$a2,4*$BNSZ # bits
915 sgeu $at,$a0,$a2
916 .set noreorder
917 beqz $at,.+12
918 nop
919 $SUBU $a0,$a2
920 .set reorder
921
922 li $QT,-1
923 $SRL $HH,$a0,4*$BNSZ # bits
924 $SRL $QT,4*$BNSZ # q=0xffffffff
925 beq $DH,$HH,.L_bn_div_words_skip_div1
926 $DIVU $zero,$a0,$DH
927 mflo $QT
928.L_bn_div_words_skip_div1:
929 $MULTU $a2,$QT
930 $SLL $t3,$a0,4*$BNSZ # bits
931 $SRL $at,$a1,4*$BNSZ # bits
932 or $t3,$at
933 mflo $t0
934 mfhi $t1
935.L_bn_div_words_inner_loop1:
936 sltu $t2,$t3,$t0
937 seq $t8,$HH,$t1
938 sltu $at,$HH,$t1
939 and $t2,$t8
940 sltu $v0,$t0,$a2
941 or $at,$t2
942 .set noreorder
943 beqz $at,.L_bn_div_words_inner_loop1_done
944 $SUBU $t1,$v0
945 $SUBU $t0,$a2
946 b .L_bn_div_words_inner_loop1
947 $SUBU $QT,1
948 .set reorder
949.L_bn_div_words_inner_loop1_done:
950
951 $SLL $a1,4*$BNSZ # bits
952 $SUBU $a0,$t3,$t0
953 $SLL $v0,$QT,4*$BNSZ # bits
954
955 li $QT,-1
956 $SRL $HH,$a0,4*$BNSZ # bits
957 $SRL $QT,4*$BNSZ # q=0xffffffff
958 beq $DH,$HH,.L_bn_div_words_skip_div2
959 $DIVU $zero,$a0,$DH
960 mflo $QT
961.L_bn_div_words_skip_div2:
962 $MULTU $a2,$QT
963 $SLL $t3,$a0,4*$BNSZ # bits
964 $SRL $at,$a1,4*$BNSZ # bits
965 or $t3,$at
966 mflo $t0
967 mfhi $t1
968.L_bn_div_words_inner_loop2:
969 sltu $t2,$t3,$t0
970 seq $t8,$HH,$t1
971 sltu $at,$HH,$t1
972 and $t2,$t8
973 sltu $v1,$t0,$a2
974 or $at,$t2
975 .set noreorder
976 beqz $at,.L_bn_div_words_inner_loop2_done
977 $SUBU $t1,$v1
978 $SUBU $t0,$a2
979 b .L_bn_div_words_inner_loop2
980 $SUBU $QT,1
981 .set reorder
982.L_bn_div_words_inner_loop2_done:
983
984 $SUBU $a0,$t3,$t0
985 or $v0,$QT
986 $SRL $v1,$a0,$t9 # $v1 contains remainder if anybody wants it
987 $SRL $a2,$t9 # restore $a2
988
989 .set noreorder
990 move $a1,$v1
991___
992$code.=<<___ if ($flavour =~ /nubi/i);
993 $REG_L $t3,4*$SZREG($sp)
994 $REG_L $t2,3*$SZREG($sp)
995 $REG_L $t1,2*$SZREG($sp)
996 $REG_L $t0,1*$SZREG($sp)
997 $REG_L $gp,0*$SZREG($sp)
998 $PTR_ADD $sp,6*$SZREG
999___
1000$code.=<<___;
1001 jr $ra
1002 move $a0,$v0
1003.end bn_div_words_internal
1004___
1005undef $HH; undef $QT; undef $DH;
1006
1007($a_0,$a_1,$a_2,$a_3)=($t0,$t1,$t2,$t3);
1008($b_0,$b_1,$b_2,$b_3)=($ta0,$ta1,$ta2,$ta3);
1009
1010($a_4,$a_5,$a_6,$a_7)=($s0,$s2,$s4,$a1); # once we load a[7], no use for $a1
1011($b_4,$b_5,$b_6,$b_7)=($s1,$s3,$s5,$a2); # once we load b[7], no use for $a2
1012
1013($t_1,$t_2,$c_1,$c_2,$c_3)=($t8,$t9,$v0,$v1,$a3);
1014
1015$code.=<<___;
1016
1017.align 5
1018.globl bn_mul_comba8
1019.ent bn_mul_comba8
1020bn_mul_comba8:
1021 .set noreorder
1022___
1023$code.=<<___ if ($flavour =~ /nubi/i);
1024 .frame $sp,12*$SZREG,$ra
1025 .mask 0x803ff008,-$SZREG
1026 $PTR_SUB $sp,12*$SZREG
1027 $REG_S $ra,11*$SZREG($sp)
1028 $REG_S $s5,10*$SZREG($sp)
1029 $REG_S $s4,9*$SZREG($sp)
1030 $REG_S $s3,8*$SZREG($sp)
1031 $REG_S $s2,7*$SZREG($sp)
1032 $REG_S $s1,6*$SZREG($sp)
1033 $REG_S $s0,5*$SZREG($sp)
1034 $REG_S $t3,4*$SZREG($sp)
1035 $REG_S $t2,3*$SZREG($sp)
1036 $REG_S $t1,2*$SZREG($sp)
1037 $REG_S $t0,1*$SZREG($sp)
1038 $REG_S $gp,0*$SZREG($sp)
1039___
1040$code.=<<___ if ($flavour !~ /nubi/i);
1041 .frame $sp,6*$SZREG,$ra
1042 .mask 0x003f0000,-$SZREG
1043 $PTR_SUB $sp,6*$SZREG
1044 $REG_S $s5,5*$SZREG($sp)
1045 $REG_S $s4,4*$SZREG($sp)
1046 $REG_S $s3,3*$SZREG($sp)
1047 $REG_S $s2,2*$SZREG($sp)
1048 $REG_S $s1,1*$SZREG($sp)
1049 $REG_S $s0,0*$SZREG($sp)
1050___
1051$code.=<<___;
1052
1053 .set reorder
1054 $LD $a_0,0($a1) # If compiled with -mips3 option on
1055 # R5000 box assembler barks on this
1056 # 1ine with "should not have mult/div
1057 # as last instruction in bb (R10K
1058 # bug)" warning. If anybody out there
1059 # has a clue about how to circumvent
1060 # this do send me a note.
1061 # <appro\@fy.chalmers.se>
1062
1063 $LD $b_0,0($a2)
1064 $LD $a_1,$BNSZ($a1)
1065 $LD $a_2,2*$BNSZ($a1)
1066 $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3);
1067 $LD $a_3,3*$BNSZ($a1)
1068 $LD $b_1,$BNSZ($a2)
1069 $LD $b_2,2*$BNSZ($a2)
1070 $LD $b_3,3*$BNSZ($a2)
1071 mflo $c_1
1072 mfhi $c_2
1073
1074 $LD $a_4,4*$BNSZ($a1)
1075 $LD $a_5,5*$BNSZ($a1)
1076 $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1);
1077 $LD $a_6,6*$BNSZ($a1)
1078 $LD $a_7,7*$BNSZ($a1)
1079 $LD $b_4,4*$BNSZ($a2)
1080 $LD $b_5,5*$BNSZ($a2)
1081 mflo $t_1
1082 mfhi $t_2
1083 $ADDU $c_2,$t_1
1084 sltu $at,$c_2,$t_1
1085 $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1);
1086 $ADDU $c_3,$t_2,$at
1087 $LD $b_6,6*$BNSZ($a2)
1088 $LD $b_7,7*$BNSZ($a2)
1089 $ST $c_1,0($a0) # r[0]=c1;
1090 mflo $t_1
1091 mfhi $t_2
1092 $ADDU $c_2,$t_1
1093 sltu $at,$c_2,$t_1
1094 $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2);
1095 $ADDU $t_2,$at
1096 $ADDU $c_3,$t_2
1097 sltu $c_1,$c_3,$t_2
1098 $ST $c_2,$BNSZ($a0) # r[1]=c2;
1099
1100 mflo $t_1
1101 mfhi $t_2
1102 $ADDU $c_3,$t_1
1103 sltu $at,$c_3,$t_1
1104 $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2);
1105 $ADDU $t_2,$at
1106 $ADDU $c_1,$t_2
1107 mflo $t_1
1108 mfhi $t_2
1109 $ADDU $c_3,$t_1
1110 sltu $at,$c_3,$t_1
1111 $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2);
1112 $ADDU $t_2,$at
1113 $ADDU $c_1,$t_2
1114 sltu $c_2,$c_1,$t_2
1115 mflo $t_1
1116 mfhi $t_2
1117 $ADDU $c_3,$t_1
1118 sltu $at,$c_3,$t_1
1119 $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3);
1120 $ADDU $t_2,$at
1121 $ADDU $c_1,$t_2
1122 sltu $at,$c_1,$t_2
1123 $ADDU $c_2,$at
1124 $ST $c_3,2*$BNSZ($a0) # r[2]=c3;
1125
1126 mflo $t_1
1127 mfhi $t_2
1128 $ADDU $c_1,$t_1
1129 sltu $at,$c_1,$t_1
1130 $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3);
1131 $ADDU $t_2,$at
1132 $ADDU $c_2,$t_2
1133 sltu $c_3,$c_2,$t_2
1134 mflo $t_1
1135 mfhi $t_2
1136 $ADDU $c_1,$t_1
1137 sltu $at,$c_1,$t_1
1138 $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3);
1139 $ADDU $t_2,$at
1140 $ADDU $c_2,$t_2
1141 sltu $at,$c_2,$t_2
1142 $ADDU $c_3,$at
1143 mflo $t_1
1144 mfhi $t_2
1145 $ADDU $c_1,$t_1
1146 sltu $at,$c_1,$t_1
1147 $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3);
1148 $ADDU $t_2,$at
1149 $ADDU $c_2,$t_2
1150 sltu $at,$c_2,$t_2
1151 $ADDU $c_3,$at
1152 mflo $t_1
1153 mfhi $t_2
1154 $ADDU $c_1,$t_1
1155 sltu $at,$c_1,$t_1
1156 $MULTU $a_4,$b_0 # mul_add_c(a[4],b[0],c2,c3,c1);
1157 $ADDU $t_2,$at
1158 $ADDU $c_2,$t_2
1159 sltu $at,$c_2,$t_2
1160 $ADDU $c_3,$at
1161 $ST $c_1,3*$BNSZ($a0) # r[3]=c1;
1162
1163 mflo $t_1
1164 mfhi $t_2
1165 $ADDU $c_2,$t_1
1166 sltu $at,$c_2,$t_1
1167 $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1);
1168 $ADDU $t_2,$at
1169 $ADDU $c_3,$t_2
1170 sltu $c_1,$c_3,$t_2
1171 mflo $t_1
1172 mfhi $t_2
1173 $ADDU $c_2,$t_1
1174 sltu $at,$c_2,$t_1
1175 $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1);
1176 $ADDU $t_2,$at
1177 $ADDU $c_3,$t_2
1178 sltu $at,$c_3,$t_2
1179 $ADDU $c_1,$at
1180 mflo $t_1
1181 mfhi $t_2
1182 $ADDU $c_2,$t_1
1183 sltu $at,$c_2,$t_1
1184 $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1);
1185 $ADDU $t_2,$at
1186 $ADDU $c_3,$t_2
1187 sltu $at,$c_3,$t_2
1188 $ADDU $c_1,$at
1189 mflo $t_1
1190 mfhi $t_2
1191 $ADDU $c_2,$t_1
1192 sltu $at,$c_2,$t_1
1193 $MULTU $a_0,$b_4 # mul_add_c(a[0],b[4],c2,c3,c1);
1194 $ADDU $t_2,$at
1195 $ADDU $c_3,$t_2
1196 sltu $at,$c_3,$t_2
1197 $ADDU $c_1,$at
1198 mflo $t_1
1199 mfhi $t_2
1200 $ADDU $c_2,$t_1
1201 sltu $at,$c_2,$t_1
1202 $MULTU $a_0,$b_5 # mul_add_c(a[0],b[5],c3,c1,c2);
1203 $ADDU $t_2,$at
1204 $ADDU $c_3,$t_2
1205 sltu $at,$c_3,$t_2
1206 $ADDU $c_1,$at
1207 $ST $c_2,4*$BNSZ($a0) # r[4]=c2;
1208
1209 mflo $t_1
1210 mfhi $t_2
1211 $ADDU $c_3,$t_1
1212 sltu $at,$c_3,$t_1
1213 $MULTU $a_1,$b_4 # mul_add_c(a[1],b[4],c3,c1,c2);
1214 $ADDU $t_2,$at
1215 $ADDU $c_1,$t_2
1216 sltu $c_2,$c_1,$t_2
1217 mflo $t_1
1218 mfhi $t_2
1219 $ADDU $c_3,$t_1
1220 sltu $at,$c_3,$t_1
1221 $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2);
1222 $ADDU $t_2,$at
1223 $ADDU $c_1,$t_2
1224 sltu $at,$c_1,$t_2
1225 $ADDU $c_2,$at
1226 mflo $t_1
1227 mfhi $t_2
1228 $ADDU $c_3,$t_1
1229 sltu $at,$c_3,$t_1
1230 $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2);
1231 $ADDU $t_2,$at
1232 $ADDU $c_1,$t_2
1233 sltu $at,$c_1,$t_2
1234 $ADDU $c_2,$at
1235 mflo $t_1
1236 mfhi $t_2
1237 $ADDU $c_3,$t_1
1238 sltu $at,$c_3,$t_1
1239 $MULTU $a_4,$b_1 # mul_add_c(a[4],b[1],c3,c1,c2);
1240 $ADDU $t_2,$at
1241 $ADDU $c_1,$t_2
1242 sltu $at,$c_1,$t_2
1243 $ADDU $c_2,$at
1244 mflo $t_1
1245 mfhi $t_2
1246 $ADDU $c_3,$t_1
1247 sltu $at,$c_3,$t_1
1248 $MULTU $a_5,$b_0 # mul_add_c(a[5],b[0],c3,c1,c2);
1249 $ADDU $t_2,$at
1250 $ADDU $c_1,$t_2
1251 sltu $at,$c_1,$t_2
1252 $ADDU $c_2,$at
1253 mflo $t_1
1254 mfhi $t_2
1255 $ADDU $c_3,$t_1
1256 sltu $at,$c_3,$t_1
1257 $MULTU $a_6,$b_0 # mul_add_c(a[6],b[0],c1,c2,c3);
1258 $ADDU $t_2,$at
1259 $ADDU $c_1,$t_2
1260 sltu $at,$c_1,$t_2
1261 $ADDU $c_2,$at
1262 $ST $c_3,5*$BNSZ($a0) # r[5]=c3;
1263
1264 mflo $t_1
1265 mfhi $t_2
1266 $ADDU $c_1,$t_1
1267 sltu $at,$c_1,$t_1
1268 $MULTU $a_5,$b_1 # mul_add_c(a[5],b[1],c1,c2,c3);
1269 $ADDU $t_2,$at
1270 $ADDU $c_2,$t_2
1271 sltu $c_3,$c_2,$t_2
1272 mflo $t_1
1273 mfhi $t_2
1274 $ADDU $c_1,$t_1
1275 sltu $at,$c_1,$t_1
1276 $MULTU $a_4,$b_2 # mul_add_c(a[4],b[2],c1,c2,c3);
1277 $ADDU $t_2,$at
1278 $ADDU $c_2,$t_2
1279 sltu $at,$c_2,$t_2
1280 $ADDU $c_3,$at
1281 mflo $t_1
1282 mfhi $t_2
1283 $ADDU $c_1,$t_1
1284 sltu $at,$c_1,$t_1
1285 $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3);
1286 $ADDU $t_2,$at
1287 $ADDU $c_2,$t_2
1288 sltu $at,$c_2,$t_2
1289 $ADDU $c_3,$at
1290 mflo $t_1
1291 mfhi $t_2
1292 $ADDU $c_1,$t_1
1293 sltu $at,$c_1,$t_1
1294 $MULTU $a_2,$b_4 # mul_add_c(a[2],b[4],c1,c2,c3);
1295 $ADDU $t_2,$at
1296 $ADDU $c_2,$t_2
1297 sltu $at,$c_2,$t_2
1298 $ADDU $c_3,$at
1299 mflo $t_1
1300 mfhi $t_2
1301 $ADDU $c_1,$t_1
1302 sltu $at,$c_1,$t_1
1303 $MULTU $a_1,$b_5 # mul_add_c(a[1],b[5],c1,c2,c3);
1304 $ADDU $t_2,$at
1305 $ADDU $c_2,$t_2
1306 sltu $at,$c_2,$t_2
1307 $ADDU $c_3,$at
1308 mflo $t_1
1309 mfhi $t_2
1310 $ADDU $c_1,$t_1
1311 sltu $at,$c_1,$t_1
1312 $MULTU $a_0,$b_6 # mul_add_c(a[0],b[6],c1,c2,c3);
1313 $ADDU $t_2,$at
1314 $ADDU $c_2,$t_2
1315 sltu $at,$c_2,$t_2
1316 $ADDU $c_3,$at
1317 mflo $t_1
1318 mfhi $t_2
1319 $ADDU $c_1,$t_1
1320 sltu $at,$c_1,$t_1
1321 $MULTU $a_0,$b_7 # mul_add_c(a[0],b[7],c2,c3,c1);
1322 $ADDU $t_2,$at
1323 $ADDU $c_2,$t_2
1324 sltu $at,$c_2,$t_2
1325 $ADDU $c_3,$at
1326 $ST $c_1,6*$BNSZ($a0) # r[6]=c1;
1327
1328 mflo $t_1
1329 mfhi $t_2
1330 $ADDU $c_2,$t_1
1331 sltu $at,$c_2,$t_1
1332 $MULTU $a_1,$b_6 # mul_add_c(a[1],b[6],c2,c3,c1);
1333 $ADDU $t_2,$at
1334 $ADDU $c_3,$t_2
1335 sltu $c_1,$c_3,$t_2
1336 mflo $t_1
1337 mfhi $t_2
1338 $ADDU $c_2,$t_1
1339 sltu $at,$c_2,$t_1
1340 $MULTU $a_2,$b_5 # mul_add_c(a[2],b[5],c2,c3,c1);
1341 $ADDU $t_2,$at
1342 $ADDU $c_3,$t_2
1343 sltu $at,$c_3,$t_2
1344 $ADDU $c_1,$at
1345 mflo $t_1
1346 mfhi $t_2
1347 $ADDU $c_2,$t_1
1348 sltu $at,$c_2,$t_1
1349 $MULTU $a_3,$b_4 # mul_add_c(a[3],b[4],c2,c3,c1);
1350 $ADDU $t_2,$at
1351 $ADDU $c_3,$t_2
1352 sltu $at,$c_3,$t_2
1353 $ADDU $c_1,$at
1354 mflo $t_1
1355 mfhi $t_2
1356 $ADDU $c_2,$t_1
1357 sltu $at,$c_2,$t_1
1358 $MULTU $a_4,$b_3 # mul_add_c(a[4],b[3],c2,c3,c1);
1359 $ADDU $t_2,$at
1360 $ADDU $c_3,$t_2
1361 sltu $at,$c_3,$t_2
1362 $ADDU $c_1,$at
1363 mflo $t_1
1364 mfhi $t_2
1365 $ADDU $c_2,$t_1
1366 sltu $at,$c_2,$t_1
1367 $MULTU $a_5,$b_2 # mul_add_c(a[5],b[2],c2,c3,c1);
1368 $ADDU $t_2,$at
1369 $ADDU $c_3,$t_2
1370 sltu $at,$c_3,$t_2
1371 $ADDU $c_1,$at
1372 mflo $t_1
1373 mfhi $t_2
1374 $ADDU $c_2,$t_1
1375 sltu $at,$c_2,$t_1
1376 $MULTU $a_6,$b_1 # mul_add_c(a[6],b[1],c2,c3,c1);
1377 $ADDU $t_2,$at
1378 $ADDU $c_3,$t_2
1379 sltu $at,$c_3,$t_2
1380 $ADDU $c_1,$at
1381 mflo $t_1
1382 mfhi $t_2
1383 $ADDU $c_2,$t_1
1384 sltu $at,$c_2,$t_1
1385 $MULTU $a_7,$b_0 # mul_add_c(a[7],b[0],c2,c3,c1);
1386 $ADDU $t_2,$at
1387 $ADDU $c_3,$t_2
1388 sltu $at,$c_3,$t_2
1389 $ADDU $c_1,$at
1390 mflo $t_1
1391 mfhi $t_2
1392 $ADDU $c_2,$t_1
1393 sltu $at,$c_2,$t_1
1394 $MULTU $a_7,$b_1 # mul_add_c(a[7],b[1],c3,c1,c2);
1395 $ADDU $t_2,$at
1396 $ADDU $c_3,$t_2
1397 sltu $at,$c_3,$t_2
1398 $ADDU $c_1,$at
1399 $ST $c_2,7*$BNSZ($a0) # r[7]=c2;
1400
1401 mflo $t_1
1402 mfhi $t_2
1403 $ADDU $c_3,$t_1
1404 sltu $at,$c_3,$t_1
1405 $MULTU $a_6,$b_2 # mul_add_c(a[6],b[2],c3,c1,c2);
1406 $ADDU $t_2,$at
1407 $ADDU $c_1,$t_2
1408 sltu $c_2,$c_1,$t_2
1409 mflo $t_1
1410 mfhi $t_2
1411 $ADDU $c_3,$t_1
1412 sltu $at,$c_3,$t_1
1413 $MULTU $a_5,$b_3 # mul_add_c(a[5],b[3],c3,c1,c2);
1414 $ADDU $t_2,$at
1415 $ADDU $c_1,$t_2
1416 sltu $at,$c_1,$t_2
1417 $ADDU $c_2,$at
1418 mflo $t_1
1419 mfhi $t_2
1420 $ADDU $c_3,$t_1
1421 sltu $at,$c_3,$t_1
1422 $MULTU $a_4,$b_4 # mul_add_c(a[4],b[4],c3,c1,c2);
1423 $ADDU $t_2,$at
1424 $ADDU $c_1,$t_2
1425 sltu $at,$c_1,$t_2
1426 $ADDU $c_2,$at
1427 mflo $t_1
1428 mfhi $t_2
1429 $ADDU $c_3,$t_1
1430 sltu $at,$c_3,$t_1
1431 $MULTU $a_3,$b_5 # mul_add_c(a[3],b[5],c3,c1,c2);
1432 $ADDU $t_2,$at
1433 $ADDU $c_1,$t_2
1434 sltu $at,$c_1,$t_2
1435 $ADDU $c_2,$at
1436 mflo $t_1
1437 mfhi $t_2
1438 $ADDU $c_3,$t_1
1439 sltu $at,$c_3,$t_1
1440 $MULTU $a_2,$b_6 # mul_add_c(a[2],b[6],c3,c1,c2);
1441 $ADDU $t_2,$at
1442 $ADDU $c_1,$t_2
1443 sltu $at,$c_1,$t_2
1444 $ADDU $c_2,$at
1445 mflo $t_1
1446 mfhi $t_2
1447 $ADDU $c_3,$t_1
1448 sltu $at,$c_3,$t_1
1449 $MULTU $a_1,$b_7 # mul_add_c(a[1],b[7],c3,c1,c2);
1450 $ADDU $t_2,$at
1451 $ADDU $c_1,$t_2
1452 sltu $at,$c_1,$t_2
1453 $ADDU $c_2,$at
1454 mflo $t_1
1455 mfhi $t_2
1456 $ADDU $c_3,$t_1
1457 sltu $at,$c_3,$t_1
1458 $MULTU $a_2,$b_7 # mul_add_c(a[2],b[7],c1,c2,c3);
1459 $ADDU $t_2,$at
1460 $ADDU $c_1,$t_2
1461 sltu $at,$c_1,$t_2
1462 $ADDU $c_2,$at
1463 $ST $c_3,8*$BNSZ($a0) # r[8]=c3;
1464
1465 mflo $t_1
1466 mfhi $t_2
1467 $ADDU $c_1,$t_1
1468 sltu $at,$c_1,$t_1
1469 $MULTU $a_3,$b_6 # mul_add_c(a[3],b[6],c1,c2,c3);
1470 $ADDU $t_2,$at
1471 $ADDU $c_2,$t_2
1472 sltu $c_3,$c_2,$t_2
1473 mflo $t_1
1474 mfhi $t_2
1475 $ADDU $c_1,$t_1
1476 sltu $at,$c_1,$t_1
1477 $MULTU $a_4,$b_5 # mul_add_c(a[4],b[5],c1,c2,c3);
1478 $ADDU $t_2,$at
1479 $ADDU $c_2,$t_2
1480 sltu $at,$c_2,$t_2
1481 $ADDU $c_3,$at
1482 mflo $t_1
1483 mfhi $t_2
1484 $ADDU $c_1,$t_1
1485 sltu $at,$c_1,$t_1
1486 $MULTU $a_5,$b_4 # mul_add_c(a[5],b[4],c1,c2,c3);
1487 $ADDU $t_2,$at
1488 $ADDU $c_2,$t_2
1489 sltu $at,$c_2,$t_2
1490 $ADDU $c_3,$at
1491 mflo $t_1
1492 mfhi $t_2
1493 $ADDU $c_1,$t_1
1494 sltu $at,$c_1,$t_1
1495 $MULTU $a_6,$b_3 # mul_add_c(a[6],b[3],c1,c2,c3);
1496 $ADDU $t_2,$at
1497 $ADDU $c_2,$t_2
1498 sltu $at,$c_2,$t_2
1499 $ADDU $c_3,$at
1500 mflo $t_1
1501 mfhi $t_2
1502 $ADDU $c_1,$t_1
1503 sltu $at,$c_1,$t_1
1504 $MULTU $a_7,$b_2 # mul_add_c(a[7],b[2],c1,c2,c3);
1505 $ADDU $t_2,$at
1506 $ADDU $c_2,$t_2
1507 sltu $at,$c_2,$t_2
1508 $ADDU $c_3,$at
1509 mflo $t_1
1510 mfhi $t_2
1511 $ADDU $c_1,$t_1
1512 sltu $at,$c_1,$t_1
1513 $MULTU $a_7,$b_3 # mul_add_c(a[7],b[3],c2,c3,c1);
1514 $ADDU $t_2,$at
1515 $ADDU $c_2,$t_2
1516 sltu $at,$c_2,$t_2
1517 $ADDU $c_3,$at
1518 $ST $c_1,9*$BNSZ($a0) # r[9]=c1;
1519
1520 mflo $t_1
1521 mfhi $t_2
1522 $ADDU $c_2,$t_1
1523 sltu $at,$c_2,$t_1
1524 $MULTU $a_6,$b_4 # mul_add_c(a[6],b[4],c2,c3,c1);
1525 $ADDU $t_2,$at
1526 $ADDU $c_3,$t_2
1527 sltu $c_1,$c_3,$t_2
1528 mflo $t_1
1529 mfhi $t_2
1530 $ADDU $c_2,$t_1
1531 sltu $at,$c_2,$t_1
1532 $MULTU $a_5,$b_5 # mul_add_c(a[5],b[5],c2,c3,c1);
1533 $ADDU $t_2,$at
1534 $ADDU $c_3,$t_2
1535 sltu $at,$c_3,$t_2
1536 $ADDU $c_1,$at
1537 mflo $t_1
1538 mfhi $t_2
1539 $ADDU $c_2,$t_1
1540 sltu $at,$c_2,$t_1
1541 $MULTU $a_4,$b_6 # mul_add_c(a[4],b[6],c2,c3,c1);
1542 $ADDU $t_2,$at
1543 $ADDU $c_3,$t_2
1544 sltu $at,$c_3,$t_2
1545 $ADDU $c_1,$at
1546 mflo $t_1
1547 mfhi $t_2
1548 $ADDU $c_2,$t_1
1549 sltu $at,$c_2,$t_1
1550 $MULTU $a_3,$b_7 # mul_add_c(a[3],b[7],c2,c3,c1);
1551 $ADDU $t_2,$at
1552 $ADDU $c_3,$t_2
1553 sltu $at,$c_3,$t_2
1554 $ADDU $c_1,$at
1555 mflo $t_1
1556 mfhi $t_2
1557 $ADDU $c_2,$t_1
1558 sltu $at,$c_2,$t_1
1559 $MULTU $a_4,$b_7 # mul_add_c(a[4],b[7],c3,c1,c2);
1560 $ADDU $t_2,$at
1561 $ADDU $c_3,$t_2
1562 sltu $at,$c_3,$t_2
1563 $ADDU $c_1,$at
1564 $ST $c_2,10*$BNSZ($a0) # r[10]=c2;
1565
1566 mflo $t_1
1567 mfhi $t_2
1568 $ADDU $c_3,$t_1
1569 sltu $at,$c_3,$t_1
1570 $MULTU $a_5,$b_6 # mul_add_c(a[5],b[6],c3,c1,c2);
1571 $ADDU $t_2,$at
1572 $ADDU $c_1,$t_2
1573 sltu $c_2,$c_1,$t_2
1574 mflo $t_1
1575 mfhi $t_2
1576 $ADDU $c_3,$t_1
1577 sltu $at,$c_3,$t_1
1578 $MULTU $a_6,$b_5 # mul_add_c(a[6],b[5],c3,c1,c2);
1579 $ADDU $t_2,$at
1580 $ADDU $c_1,$t_2
1581 sltu $at,$c_1,$t_2
1582 $ADDU $c_2,$at
1583 mflo $t_1
1584 mfhi $t_2
1585 $ADDU $c_3,$t_1
1586 sltu $at,$c_3,$t_1
1587 $MULTU $a_7,$b_4 # mul_add_c(a[7],b[4],c3,c1,c2);
1588 $ADDU $t_2,$at
1589 $ADDU $c_1,$t_2
1590 sltu $at,$c_1,$t_2
1591 $ADDU $c_2,$at
1592 mflo $t_1
1593 mfhi $t_2
1594 $ADDU $c_3,$t_1
1595 sltu $at,$c_3,$t_1
1596 $MULTU $a_7,$b_5 # mul_add_c(a[7],b[5],c1,c2,c3);
1597 $ADDU $t_2,$at
1598 $ADDU $c_1,$t_2
1599 sltu $at,$c_1,$t_2
1600 $ADDU $c_2,$at
1601 $ST $c_3,11*$BNSZ($a0) # r[11]=c3;
1602
1603 mflo $t_1
1604 mfhi $t_2
1605 $ADDU $c_1,$t_1
1606 sltu $at,$c_1,$t_1
1607 $MULTU $a_6,$b_6 # mul_add_c(a[6],b[6],c1,c2,c3);
1608 $ADDU $t_2,$at
1609 $ADDU $c_2,$t_2
1610 sltu $c_3,$c_2,$t_2
1611 mflo $t_1
1612 mfhi $t_2
1613 $ADDU $c_1,$t_1
1614 sltu $at,$c_1,$t_1
1615 $MULTU $a_5,$b_7 # mul_add_c(a[5],b[7],c1,c2,c3);
1616 $ADDU $t_2,$at
1617 $ADDU $c_2,$t_2
1618 sltu $at,$c_2,$t_2
1619 $ADDU $c_3,$at
1620 mflo $t_1
1621 mfhi $t_2
1622 $ADDU $c_1,$t_1
1623 sltu $at,$c_1,$t_1
1624 $MULTU $a_6,$b_7 # mul_add_c(a[6],b[7],c2,c3,c1);
1625 $ADDU $t_2,$at
1626 $ADDU $c_2,$t_2
1627 sltu $at,$c_2,$t_2
1628 $ADDU $c_3,$at
1629 $ST $c_1,12*$BNSZ($a0) # r[12]=c1;
1630
1631 mflo $t_1
1632 mfhi $t_2
1633 $ADDU $c_2,$t_1
1634 sltu $at,$c_2,$t_1
1635 $MULTU $a_7,$b_6 # mul_add_c(a[7],b[6],c2,c3,c1);
1636 $ADDU $t_2,$at
1637 $ADDU $c_3,$t_2
1638 sltu $c_1,$c_3,$t_2
1639 mflo $t_1
1640 mfhi $t_2
1641 $ADDU $c_2,$t_1
1642 sltu $at,$c_2,$t_1
1643 $MULTU $a_7,$b_7 # mul_add_c(a[7],b[7],c3,c1,c2);
1644 $ADDU $t_2,$at
1645 $ADDU $c_3,$t_2
1646 sltu $at,$c_3,$t_2
1647 $ADDU $c_1,$at
1648 $ST $c_2,13*$BNSZ($a0) # r[13]=c2;
1649
1650 mflo $t_1
1651 mfhi $t_2
1652 $ADDU $c_3,$t_1
1653 sltu $at,$c_3,$t_1
1654 $ADDU $t_2,$at
1655 $ADDU $c_1,$t_2
1656 $ST $c_3,14*$BNSZ($a0) # r[14]=c3;
1657 $ST $c_1,15*$BNSZ($a0) # r[15]=c1;
1658
1659 .set noreorder
1660___
1661$code.=<<___ if ($flavour =~ /nubi/i);
1662 $REG_L $s5,10*$SZREG($sp)
1663 $REG_L $s4,9*$SZREG($sp)
1664 $REG_L $s3,8*$SZREG($sp)
1665 $REG_L $s2,7*$SZREG($sp)
1666 $REG_L $s1,6*$SZREG($sp)
1667 $REG_L $s0,5*$SZREG($sp)
1668 $REG_L $t3,4*$SZREG($sp)
1669 $REG_L $t2,3*$SZREG($sp)
1670 $REG_L $t1,2*$SZREG($sp)
1671 $REG_L $t0,1*$SZREG($sp)
1672 $REG_L $gp,0*$SZREG($sp)
1673 jr $ra
1674 $PTR_ADD $sp,12*$SZREG
1675___
1676$code.=<<___ if ($flavour !~ /nubi/i);
1677 $REG_L $s5,5*$SZREG($sp)
1678 $REG_L $s4,4*$SZREG($sp)
1679 $REG_L $s3,3*$SZREG($sp)
1680 $REG_L $s2,2*$SZREG($sp)
1681 $REG_L $s1,1*$SZREG($sp)
1682 $REG_L $s0,0*$SZREG($sp)
1683 jr $ra
1684 $PTR_ADD $sp,6*$SZREG
1685___
1686$code.=<<___;
1687.end bn_mul_comba8
1688
1689.align 5
1690.globl bn_mul_comba4
1691.ent bn_mul_comba4
1692bn_mul_comba4:
1693___
1694$code.=<<___ if ($flavour =~ /nubi/i);
1695 .frame $sp,6*$SZREG,$ra
1696 .mask 0x8000f008,-$SZREG
1697 .set noreorder
1698 $PTR_SUB $sp,6*$SZREG
1699 $REG_S $ra,5*$SZREG($sp)
1700 $REG_S $t3,4*$SZREG($sp)
1701 $REG_S $t2,3*$SZREG($sp)
1702 $REG_S $t1,2*$SZREG($sp)
1703 $REG_S $t0,1*$SZREG($sp)
1704 $REG_S $gp,0*$SZREG($sp)
1705___
1706$code.=<<___;
1707 .set reorder
1708 $LD $a_0,0($a1)
1709 $LD $b_0,0($a2)
1710 $LD $a_1,$BNSZ($a1)
1711 $LD $a_2,2*$BNSZ($a1)
1712 $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3);
1713 $LD $a_3,3*$BNSZ($a1)
1714 $LD $b_1,$BNSZ($a2)
1715 $LD $b_2,2*$BNSZ($a2)
1716 $LD $b_3,3*$BNSZ($a2)
1717 mflo $c_1
1718 mfhi $c_2
1719 $ST $c_1,0($a0)
1720
1721 $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1);
1722 mflo $t_1
1723 mfhi $t_2
1724 $ADDU $c_2,$t_1
1725 sltu $at,$c_2,$t_1
1726 $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1);
1727 $ADDU $c_3,$t_2,$at
1728 mflo $t_1
1729 mfhi $t_2
1730 $ADDU $c_2,$t_1
1731 sltu $at,$c_2,$t_1
1732 $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2);
1733 $ADDU $t_2,$at
1734 $ADDU $c_3,$t_2
1735 sltu $c_1,$c_3,$t_2
1736 $ST $c_2,$BNSZ($a0)
1737
1738 mflo $t_1
1739 mfhi $t_2
1740 $ADDU $c_3,$t_1
1741 sltu $at,$c_3,$t_1
1742 $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2);
1743 $ADDU $t_2,$at
1744 $ADDU $c_1,$t_2
1745 mflo $t_1
1746 mfhi $t_2
1747 $ADDU $c_3,$t_1
1748 sltu $at,$c_3,$t_1
1749 $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2);
1750 $ADDU $t_2,$at
1751 $ADDU $c_1,$t_2
1752 sltu $c_2,$c_1,$t_2
1753 mflo $t_1
1754 mfhi $t_2
1755 $ADDU $c_3,$t_1
1756 sltu $at,$c_3,$t_1
1757 $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3);
1758 $ADDU $t_2,$at
1759 $ADDU $c_1,$t_2
1760 sltu $at,$c_1,$t_2
1761 $ADDU $c_2,$at
1762 $ST $c_3,2*$BNSZ($a0)
1763
1764 mflo $t_1
1765 mfhi $t_2
1766 $ADDU $c_1,$t_1
1767 sltu $at,$c_1,$t_1
1768 $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3);
1769 $ADDU $t_2,$at
1770 $ADDU $c_2,$t_2
1771 sltu $c_3,$c_2,$t_2
1772 mflo $t_1
1773 mfhi $t_2
1774 $ADDU $c_1,$t_1
1775 sltu $at,$c_1,$t_1
1776 $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3);
1777 $ADDU $t_2,$at
1778 $ADDU $c_2,$t_2
1779 sltu $at,$c_2,$t_2
1780 $ADDU $c_3,$at
1781 mflo $t_1
1782 mfhi $t_2
1783 $ADDU $c_1,$t_1
1784 sltu $at,$c_1,$t_1
1785 $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3);
1786 $ADDU $t_2,$at
1787 $ADDU $c_2,$t_2
1788 sltu $at,$c_2,$t_2
1789 $ADDU $c_3,$at
1790 mflo $t_1
1791 mfhi $t_2
1792 $ADDU $c_1,$t_1
1793 sltu $at,$c_1,$t_1
1794 $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1);
1795 $ADDU $t_2,$at
1796 $ADDU $c_2,$t_2
1797 sltu $at,$c_2,$t_2
1798 $ADDU $c_3,$at
1799 $ST $c_1,3*$BNSZ($a0)
1800
1801 mflo $t_1
1802 mfhi $t_2
1803 $ADDU $c_2,$t_1
1804 sltu $at,$c_2,$t_1
1805 $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1);
1806 $ADDU $t_2,$at
1807 $ADDU $c_3,$t_2
1808 sltu $c_1,$c_3,$t_2
1809 mflo $t_1
1810 mfhi $t_2
1811 $ADDU $c_2,$t_1
1812 sltu $at,$c_2,$t_1
1813 $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1);
1814 $ADDU $t_2,$at
1815 $ADDU $c_3,$t_2
1816 sltu $at,$c_3,$t_2
1817 $ADDU $c_1,$at
1818 mflo $t_1
1819 mfhi $t_2
1820 $ADDU $c_2,$t_1
1821 sltu $at,$c_2,$t_1
1822 $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2);
1823 $ADDU $t_2,$at
1824 $ADDU $c_3,$t_2
1825 sltu $at,$c_3,$t_2
1826 $ADDU $c_1,$at
1827 $ST $c_2,4*$BNSZ($a0)
1828
1829 mflo $t_1
1830 mfhi $t_2
1831 $ADDU $c_3,$t_1
1832 sltu $at,$c_3,$t_1
1833 $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2);
1834 $ADDU $t_2,$at
1835 $ADDU $c_1,$t_2
1836 sltu $c_2,$c_1,$t_2
1837 mflo $t_1
1838 mfhi $t_2
1839 $ADDU $c_3,$t_1
1840 sltu $at,$c_3,$t_1
1841 $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3);
1842 $ADDU $t_2,$at
1843 $ADDU $c_1,$t_2
1844 sltu $at,$c_1,$t_2
1845 $ADDU $c_2,$at
1846 $ST $c_3,5*$BNSZ($a0)
1847
1848 mflo $t_1
1849 mfhi $t_2
1850 $ADDU $c_1,$t_1
1851 sltu $at,$c_1,$t_1
1852 $ADDU $t_2,$at
1853 $ADDU $c_2,$t_2
1854 $ST $c_1,6*$BNSZ($a0)
1855 $ST $c_2,7*$BNSZ($a0)
1856
1857 .set noreorder
1858___
1859$code.=<<___ if ($flavour =~ /nubi/i);
1860 $REG_L $t3,4*$SZREG($sp)
1861 $REG_L $t2,3*$SZREG($sp)
1862 $REG_L $t1,2*$SZREG($sp)
1863 $REG_L $t0,1*$SZREG($sp)
1864 $REG_L $gp,0*$SZREG($sp)
1865 $PTR_ADD $sp,6*$SZREG
1866___
1867$code.=<<___;
1868 jr $ra
1869 nop
1870.end bn_mul_comba4
1871___
1872
1873($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3);
1874
1875sub add_c2 () {
1876my ($hi,$lo,$c0,$c1,$c2,
1877 $warm, # !$warm denotes first call with specific sequence of
1878 # $c_[XYZ] when there is no Z-carry to accumulate yet;
1879 $an,$bn # these two are arguments for multiplication which
1880 # result is used in *next* step [which is why it's
1881 # commented as "forward multiplication" below];
1882 )=@_;
1883$code.=<<___;
1884 mflo $lo
1885 mfhi $hi
1886 $ADDU $c0,$lo
1887 sltu $at,$c0,$lo
1888 $MULTU $an,$bn # forward multiplication
1889 $ADDU $c0,$lo
1890 $ADDU $at,$hi
1891 sltu $lo,$c0,$lo
1892 $ADDU $c1,$at
1893 $ADDU $hi,$lo
1894___
1895$code.=<<___ if (!$warm);
1896 sltu $c2,$c1,$at
1897 $ADDU $c1,$hi
1898 sltu $hi,$c1,$hi
1899 $ADDU $c2,$hi
1900___
1901$code.=<<___ if ($warm);
1902 sltu $at,$c1,$at
1903 $ADDU $c1,$hi
1904 $ADDU $c2,$at
1905 sltu $hi,$c1,$hi
1906 $ADDU $c2,$hi
1907___
1908}
1909
1910$code.=<<___;
1911
1912.align 5
1913.globl bn_sqr_comba8
1914.ent bn_sqr_comba8
1915bn_sqr_comba8:
1916___
1917$code.=<<___ if ($flavour =~ /nubi/i);
1918 .frame $sp,6*$SZREG,$ra
1919 .mask 0x8000f008,-$SZREG
1920 .set noreorder
1921 $PTR_SUB $sp,6*$SZREG
1922 $REG_S $ra,5*$SZREG($sp)
1923 $REG_S $t3,4*$SZREG($sp)
1924 $REG_S $t2,3*$SZREG($sp)
1925 $REG_S $t1,2*$SZREG($sp)
1926 $REG_S $t0,1*$SZREG($sp)
1927 $REG_S $gp,0*$SZREG($sp)
1928___
1929$code.=<<___;
1930 .set reorder
1931 $LD $a_0,0($a1)
1932 $LD $a_1,$BNSZ($a1)
1933 $LD $a_2,2*$BNSZ($a1)
1934 $LD $a_3,3*$BNSZ($a1)
1935
1936 $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3);
1937 $LD $a_4,4*$BNSZ($a1)
1938 $LD $a_5,5*$BNSZ($a1)
1939 $LD $a_6,6*$BNSZ($a1)
1940 $LD $a_7,7*$BNSZ($a1)
1941 mflo $c_1
1942 mfhi $c_2
1943 $ST $c_1,0($a0)
1944
1945 $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1);
1946 mflo $t_1
1947 mfhi $t_2
1948 slt $c_1,$t_2,$zero
1949 $SLL $t_2,1
1950 $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2);
1951 slt $a2,$t_1,$zero
1952 $ADDU $t_2,$a2
1953 $SLL $t_1,1
1954 $ADDU $c_2,$t_1
1955 sltu $at,$c_2,$t_1
1956 $ADDU $c_3,$t_2,$at
1957 $ST $c_2,$BNSZ($a0)
1958___
1959 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
1960 $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2);
1961$code.=<<___;
1962 mflo $t_1
1963 mfhi $t_2
1964 $ADDU $c_3,$t_1
1965 sltu $at,$c_3,$t_1
1966 $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3);
1967 $ADDU $t_2,$at
1968 $ADDU $c_1,$t_2
1969 sltu $at,$c_1,$t_2
1970 $ADDU $c_2,$at
1971 $ST $c_3,2*$BNSZ($a0)
1972___
1973 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
1974 $a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3);
1975 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
1976 $a_4,$a_0); # mul_add_c2(a[4],b[0],c2,c3,c1);
1977$code.=<<___;
1978 $ST $c_1,3*$BNSZ($a0)
1979___
1980 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
1981 $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1);
1982 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
1983 $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1);
1984$code.=<<___;
1985 mflo $t_1
1986 mfhi $t_2
1987 $ADDU $c_2,$t_1
1988 sltu $at,$c_2,$t_1
1989 $MULTU $a_0,$a_5 # mul_add_c2(a[0],b[5],c3,c1,c2);
1990 $ADDU $t_2,$at
1991 $ADDU $c_3,$t_2
1992 sltu $at,$c_3,$t_2
1993 $ADDU $c_1,$at
1994 $ST $c_2,4*$BNSZ($a0)
1995___
1996 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
1997 $a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2);
1998 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
1999 $a_2,$a_3); # mul_add_c2(a[2],b[3],c3,c1,c2);
2000 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
2001 $a_6,$a_0); # mul_add_c2(a[6],b[0],c1,c2,c3);
2002$code.=<<___;
2003 $ST $c_3,5*$BNSZ($a0)
2004___
2005 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
2006 $a_5,$a_1); # mul_add_c2(a[5],b[1],c1,c2,c3);
2007 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
2008 $a_4,$a_2); # mul_add_c2(a[4],b[2],c1,c2,c3);
2009 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
2010 $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3);
2011$code.=<<___;
2012 mflo $t_1
2013 mfhi $t_2
2014 $ADDU $c_1,$t_1
2015 sltu $at,$c_1,$t_1
2016 $MULTU $a_0,$a_7 # mul_add_c2(a[0],b[7],c2,c3,c1);
2017 $ADDU $t_2,$at
2018 $ADDU $c_2,$t_2
2019 sltu $at,$c_2,$t_2
2020 $ADDU $c_3,$at
2021 $ST $c_1,6*$BNSZ($a0)
2022___
2023 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
2024 $a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1);
2025 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
2026 $a_2,$a_5); # mul_add_c2(a[2],b[5],c2,c3,c1);
2027 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
2028 $a_3,$a_4); # mul_add_c2(a[3],b[4],c2,c3,c1);
2029 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
2030 $a_7,$a_1); # mul_add_c2(a[7],b[1],c3,c1,c2);
2031$code.=<<___;
2032 $ST $c_2,7*$BNSZ($a0)
2033___
2034 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
2035 $a_6,$a_2); # mul_add_c2(a[6],b[2],c3,c1,c2);
2036 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
2037 $a_5,$a_3); # mul_add_c2(a[5],b[3],c3,c1,c2);
2038 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
2039 $a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2);
2040$code.=<<___;
2041 mflo $t_1
2042 mfhi $t_2
2043 $ADDU $c_3,$t_1
2044 sltu $at,$c_3,$t_1
2045 $MULTU $a_2,$a_7 # mul_add_c2(a[2],b[7],c1,c2,c3);
2046 $ADDU $t_2,$at
2047 $ADDU $c_1,$t_2
2048 sltu $at,$c_1,$t_2
2049 $ADDU $c_2,$at
2050 $ST $c_3,8*$BNSZ($a0)
2051___
2052 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
2053 $a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3);
2054 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
2055 $a_4,$a_5); # mul_add_c2(a[4],b[5],c1,c2,c3);
2056 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
2057 $a_7,$a_3); # mul_add_c2(a[7],b[3],c2,c3,c1);
2058$code.=<<___;
2059 $ST $c_1,9*$BNSZ($a0)
2060___
2061 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
2062 $a_6,$a_4); # mul_add_c2(a[6],b[4],c2,c3,c1);
2063 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
2064 $a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1);
2065$code.=<<___;
2066 mflo $t_1
2067 mfhi $t_2
2068 $ADDU $c_2,$t_1
2069 sltu $at,$c_2,$t_1
2070 $MULTU $a_4,$a_7 # mul_add_c2(a[4],b[7],c3,c1,c2);
2071 $ADDU $t_2,$at
2072 $ADDU $c_3,$t_2
2073 sltu $at,$c_3,$t_2
2074 $ADDU $c_1,$at
2075 $ST $c_2,10*$BNSZ($a0)
2076___
2077 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
2078 $a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2);
2079 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
2080 $a_7,$a_5); # mul_add_c2(a[7],b[5],c1,c2,c3);
2081$code.=<<___;
2082 $ST $c_3,11*$BNSZ($a0)
2083___
2084 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
2085 $a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3);
2086$code.=<<___;
2087 mflo $t_1
2088 mfhi $t_2
2089 $ADDU $c_1,$t_1
2090 sltu $at,$c_1,$t_1
2091 $MULTU $a_6,$a_7 # mul_add_c2(a[6],b[7],c2,c3,c1);
2092 $ADDU $t_2,$at
2093 $ADDU $c_2,$t_2
2094 sltu $at,$c_2,$t_2
2095 $ADDU $c_3,$at
2096 $ST $c_1,12*$BNSZ($a0)
2097___
2098 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
2099 $a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2);
2100$code.=<<___;
2101 $ST $c_2,13*$BNSZ($a0)
2102
2103 mflo $t_1
2104 mfhi $t_2
2105 $ADDU $c_3,$t_1
2106 sltu $at,$c_3,$t_1
2107 $ADDU $t_2,$at
2108 $ADDU $c_1,$t_2
2109 $ST $c_3,14*$BNSZ($a0)
2110 $ST $c_1,15*$BNSZ($a0)
2111
2112 .set noreorder
2113___
2114$code.=<<___ if ($flavour =~ /nubi/i);
2115 $REG_L $t3,4*$SZREG($sp)
2116 $REG_L $t2,3*$SZREG($sp)
2117 $REG_L $t1,2*$SZREG($sp)
2118 $REG_L $t0,1*$SZREG($sp)
2119 $REG_L $gp,0*$SZREG($sp)
2120 $PTR_ADD $sp,6*$SZREG
2121___
2122$code.=<<___;
2123 jr $ra
2124 nop
2125.end bn_sqr_comba8
2126
2127.align 5
2128.globl bn_sqr_comba4
2129.ent bn_sqr_comba4
2130bn_sqr_comba4:
2131___
2132$code.=<<___ if ($flavour =~ /nubi/i);
2133 .frame $sp,6*$SZREG,$ra
2134 .mask 0x8000f008,-$SZREG
2135 .set noreorder
2136 $PTR_SUB $sp,6*$SZREG
2137 $REG_S $ra,5*$SZREG($sp)
2138 $REG_S $t3,4*$SZREG($sp)
2139 $REG_S $t2,3*$SZREG($sp)
2140 $REG_S $t1,2*$SZREG($sp)
2141 $REG_S $t0,1*$SZREG($sp)
2142 $REG_S $gp,0*$SZREG($sp)
2143___
2144$code.=<<___;
2145 .set reorder
2146 $LD $a_0,0($a1)
2147 $LD $a_1,$BNSZ($a1)
2148 $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3);
2149 $LD $a_2,2*$BNSZ($a1)
2150 $LD $a_3,3*$BNSZ($a1)
2151 mflo $c_1
2152 mfhi $c_2
2153 $ST $c_1,0($a0)
2154
2155 $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1);
2156 mflo $t_1
2157 mfhi $t_2
2158 slt $c_1,$t_2,$zero
2159 $SLL $t_2,1
2160 $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2);
2161 slt $a2,$t_1,$zero
2162 $ADDU $t_2,$a2
2163 $SLL $t_1,1
2164 $ADDU $c_2,$t_1
2165 sltu $at,$c_2,$t_1
2166 $ADDU $c_3,$t_2,$at
2167 $ST $c_2,$BNSZ($a0)
2168___
2169 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
2170 $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2);
2171$code.=<<___;
2172 mflo $t_1
2173 mfhi $t_2
2174 $ADDU $c_3,$t_1
2175 sltu $at,$c_3,$t_1
2176 $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3);
2177 $ADDU $t_2,$at
2178 $ADDU $c_1,$t_2
2179 sltu $at,$c_1,$t_2
2180 $ADDU $c_2,$at
2181 $ST $c_3,2*$BNSZ($a0)
2182___
2183 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
2184 $a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3);
2185 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
2186 $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1);
2187$code.=<<___;
2188 $ST $c_1,3*$BNSZ($a0)
2189___
2190 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
2191 $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1);
2192$code.=<<___;
2193 mflo $t_1
2194 mfhi $t_2
2195 $ADDU $c_2,$t_1
2196 sltu $at,$c_2,$t_1
2197 $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2);
2198 $ADDU $t_2,$at
2199 $ADDU $c_3,$t_2
2200 sltu $at,$c_3,$t_2
2201 $ADDU $c_1,$at
2202 $ST $c_2,4*$BNSZ($a0)
2203___
2204 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
2205 $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3);
2206$code.=<<___;
2207 $ST $c_3,5*$BNSZ($a0)
2208
2209 mflo $t_1
2210 mfhi $t_2
2211 $ADDU $c_1,$t_1
2212 sltu $at,$c_1,$t_1
2213 $ADDU $t_2,$at
2214 $ADDU $c_2,$t_2
2215 $ST $c_1,6*$BNSZ($a0)
2216 $ST $c_2,7*$BNSZ($a0)
2217
2218 .set noreorder
2219___
2220$code.=<<___ if ($flavour =~ /nubi/i);
2221 $REG_L $t3,4*$SZREG($sp)
2222 $REG_L $t2,3*$SZREG($sp)
2223 $REG_L $t1,2*$SZREG($sp)
2224 $REG_L $t0,1*$SZREG($sp)
2225 $REG_L $gp,0*$SZREG($sp)
2226 $PTR_ADD $sp,6*$SZREG
2227___
2228$code.=<<___;
2229 jr $ra
2230 nop
2231.end bn_sqr_comba4
2232___
2233print $code;
2234close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/modexp512-x86_64.pl b/src/lib/libcrypto/bn/asm/modexp512-x86_64.pl
deleted file mode 100644
index 8645d5adcc..0000000000
--- a/src/lib/libcrypto/bn/asm/modexp512-x86_64.pl
+++ /dev/null
@@ -1,1393 +0,0 @@
1#!/usr/bin/env perl
2#
3# Copyright (c) 2010-2011 Intel Corp.
4# Author: Vinodh.Gopal@intel.com
5# Jim Guilford
6# Erdinc.Ozturk@intel.com
7# Maxim.Perminov@intel.com
8#
9# More information about algorithm used can be found at:
10# http://www.cse.buffalo.edu/srds2009/escs2009_submission_Gopal.pdf
11#
12# ====================================================================
13# Copyright (c) 2011 The OpenSSL Project. All rights reserved.
14#
15# Redistribution and use in source and binary forms, with or without
16# modification, are permitted provided that the following conditions
17# are met:
18#
19# 1. Redistributions of source code must retain the above copyright
20# notice, this list of conditions and the following disclaimer.
21#
22# 2. Redistributions in binary form must reproduce the above copyright
23# notice, this list of conditions and the following disclaimer in
24# the documentation and/or other materials provided with the
25# distribution.
26#
27# 3. All advertising materials mentioning features or use of this
28# software must display the following acknowledgment:
29# "This product includes software developed by the OpenSSL Project
30# for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
31#
32# 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
33# endorse or promote products derived from this software without
34# prior written permission. For written permission, please contact
35# licensing@OpenSSL.org.
36#
37# 5. Products derived from this software may not be called "OpenSSL"
38# nor may "OpenSSL" appear in their names without prior written
39# permission of the OpenSSL Project.
40#
41# 6. Redistributions of any form whatsoever must retain the following
42# acknowledgment:
43# "This product includes software developed by the OpenSSL Project
44# for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
45#
46# THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
47# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
49# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
50# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
51# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
52# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
53# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
55# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
56# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
57# OF THE POSSIBILITY OF SUCH DAMAGE.
58# ====================================================================
59
60$flavour = shift;
61$output = shift;
62if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
63
64$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
65( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
66( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
67die "can't locate x86_64-xlate.pl";
68
69open OUT,"| \"$^X\" $xlate $flavour $output";
70*STDOUT=*OUT;
71
72use strict;
73my $code=".text\n\n";
74my $m=0;
75
76#
77# Define x512 macros
78#
79
80#MULSTEP_512_ADD MACRO x7, x6, x5, x4, x3, x2, x1, x0, dst, src1, src2, add_src, tmp1, tmp2
81#
82# uses rax, rdx, and args
83sub MULSTEP_512_ADD
84{
85 my ($x, $DST, $SRC2, $ASRC, $OP, $TMP)=@_;
86 my @X=@$x; # make a copy
87$code.=<<___;
88 mov (+8*0)($SRC2), %rax
89 mul $OP # rdx:rax = %OP * [0]
90 mov ($ASRC), $X[0]
91 add %rax, $X[0]
92 adc \$0, %rdx
93 mov $X[0], $DST
94___
95for(my $i=1;$i<8;$i++) {
96$code.=<<___;
97 mov %rdx, $TMP
98
99 mov (+8*$i)($SRC2), %rax
100 mul $OP # rdx:rax = %OP * [$i]
101 mov (+8*$i)($ASRC), $X[$i]
102 add %rax, $X[$i]
103 adc \$0, %rdx
104 add $TMP, $X[$i]
105 adc \$0, %rdx
106___
107}
108$code.=<<___;
109 mov %rdx, $X[0]
110___
111}
112
113#MULSTEP_512 MACRO x7, x6, x5, x4, x3, x2, x1, x0, dst, src2, src1_val, tmp
114#
115# uses rax, rdx, and args
116sub MULSTEP_512
117{
118 my ($x, $DST, $SRC2, $OP, $TMP)=@_;
119 my @X=@$x; # make a copy
120$code.=<<___;
121 mov (+8*0)($SRC2), %rax
122 mul $OP # rdx:rax = %OP * [0]
123 add %rax, $X[0]
124 adc \$0, %rdx
125 mov $X[0], $DST
126___
127for(my $i=1;$i<8;$i++) {
128$code.=<<___;
129 mov %rdx, $TMP
130
131 mov (+8*$i)($SRC2), %rax
132 mul $OP # rdx:rax = %OP * [$i]
133 add %rax, $X[$i]
134 adc \$0, %rdx
135 add $TMP, $X[$i]
136 adc \$0, %rdx
137___
138}
139$code.=<<___;
140 mov %rdx, $X[0]
141___
142}
143
144#
145# Swizzle Macros
146#
147
148# macro to copy data from flat space to swizzled table
149#MACRO swizzle pDst, pSrc, tmp1, tmp2
150# pDst and pSrc are modified
151sub swizzle
152{
153 my ($pDst, $pSrc, $cnt, $d0)=@_;
154$code.=<<___;
155 mov \$8, $cnt
156loop_$m:
157 mov ($pSrc), $d0
158 mov $d0#w, ($pDst)
159 shr \$16, $d0
160 mov $d0#w, (+64*1)($pDst)
161 shr \$16, $d0
162 mov $d0#w, (+64*2)($pDst)
163 shr \$16, $d0
164 mov $d0#w, (+64*3)($pDst)
165 lea 8($pSrc), $pSrc
166 lea 64*4($pDst), $pDst
167 dec $cnt
168 jnz loop_$m
169___
170
171 $m++;
172}
173
174# macro to copy data from swizzled table to flat space
175#MACRO unswizzle pDst, pSrc, tmp*3
176sub unswizzle
177{
178 my ($pDst, $pSrc, $cnt, $d0, $d1)=@_;
179$code.=<<___;
180 mov \$4, $cnt
181loop_$m:
182 movzxw (+64*3+256*0)($pSrc), $d0
183 movzxw (+64*3+256*1)($pSrc), $d1
184 shl \$16, $d0
185 shl \$16, $d1
186 mov (+64*2+256*0)($pSrc), $d0#w
187 mov (+64*2+256*1)($pSrc), $d1#w
188 shl \$16, $d0
189 shl \$16, $d1
190 mov (+64*1+256*0)($pSrc), $d0#w
191 mov (+64*1+256*1)($pSrc), $d1#w
192 shl \$16, $d0
193 shl \$16, $d1
194 mov (+64*0+256*0)($pSrc), $d0#w
195 mov (+64*0+256*1)($pSrc), $d1#w
196 mov $d0, (+8*0)($pDst)
197 mov $d1, (+8*1)($pDst)
198 lea 256*2($pSrc), $pSrc
199 lea 8*2($pDst), $pDst
200 sub \$1, $cnt
201 jnz loop_$m
202___
203
204 $m++;
205}
206
207#
208# Data Structures
209#
210
211# Reduce Data
212#
213#
214# Offset Value
215# 0C0 Carries
216# 0B8 X2[10]
217# 0B0 X2[9]
218# 0A8 X2[8]
219# 0A0 X2[7]
220# 098 X2[6]
221# 090 X2[5]
222# 088 X2[4]
223# 080 X2[3]
224# 078 X2[2]
225# 070 X2[1]
226# 068 X2[0]
227# 060 X1[12] P[10]
228# 058 X1[11] P[9] Z[8]
229# 050 X1[10] P[8] Z[7]
230# 048 X1[9] P[7] Z[6]
231# 040 X1[8] P[6] Z[5]
232# 038 X1[7] P[5] Z[4]
233# 030 X1[6] P[4] Z[3]
234# 028 X1[5] P[3] Z[2]
235# 020 X1[4] P[2] Z[1]
236# 018 X1[3] P[1] Z[0]
237# 010 X1[2] P[0] Y[2]
238# 008 X1[1] Q[1] Y[1]
239# 000 X1[0] Q[0] Y[0]
240
241my $X1_offset = 0; # 13 qwords
242my $X2_offset = $X1_offset + 13*8; # 11 qwords
243my $Carries_offset = $X2_offset + 11*8; # 1 qword
244my $Q_offset = 0; # 2 qwords
245my $P_offset = $Q_offset + 2*8; # 11 qwords
246my $Y_offset = 0; # 3 qwords
247my $Z_offset = $Y_offset + 3*8; # 9 qwords
248
249my $Red_Data_Size = $Carries_offset + 1*8; # (25 qwords)
250
251#
252# Stack Frame
253#
254#
255# offset value
256# ... <old stack contents>
257# ...
258# 280 Garray
259
260# 278 tmp16[15]
261# ... ...
262# 200 tmp16[0]
263
264# 1F8 tmp[7]
265# ... ...
266# 1C0 tmp[0]
267
268# 1B8 GT[7]
269# ... ...
270# 180 GT[0]
271
272# 178 Reduce Data
273# ... ...
274# 0B8 Reduce Data
275# 0B0 reserved
276# 0A8 reserved
277# 0A0 reserved
278# 098 reserved
279# 090 reserved
280# 088 reduce result addr
281# 080 exp[8]
282
283# ...
284# 048 exp[1]
285# 040 exp[0]
286
287# 038 reserved
288# 030 loop_idx
289# 028 pg
290# 020 i
291# 018 pData ; arg 4
292# 010 pG ; arg 2
293# 008 pResult ; arg 1
294# 000 rsp ; stack pointer before subtract
295
296my $rsp_offset = 0;
297my $pResult_offset = 8*1 + $rsp_offset;
298my $pG_offset = 8*1 + $pResult_offset;
299my $pData_offset = 8*1 + $pG_offset;
300my $i_offset = 8*1 + $pData_offset;
301my $pg_offset = 8*1 + $i_offset;
302my $loop_idx_offset = 8*1 + $pg_offset;
303my $reserved1_offset = 8*1 + $loop_idx_offset;
304my $exp_offset = 8*1 + $reserved1_offset;
305my $red_result_addr_offset= 8*9 + $exp_offset;
306my $reserved2_offset = 8*1 + $red_result_addr_offset;
307my $Reduce_Data_offset = 8*5 + $reserved2_offset;
308my $GT_offset = $Red_Data_Size + $Reduce_Data_offset;
309my $tmp_offset = 8*8 + $GT_offset;
310my $tmp16_offset = 8*8 + $tmp_offset;
311my $garray_offset = 8*16 + $tmp16_offset;
312my $mem_size = 8*8*32 + $garray_offset;
313
314#
315# Offsets within Reduce Data
316#
317#
318# struct MODF_2FOLD_MONT_512_C1_DATA {
319# UINT64 t[8][8];
320# UINT64 m[8];
321# UINT64 m1[8]; /* 2^768 % m */
322# UINT64 m2[8]; /* 2^640 % m */
323# UINT64 k1[2]; /* (- 1/m) % 2^128 */
324# };
325
326my $T = 0;
327my $M = 512; # = 8 * 8 * 8
328my $M1 = 576; # = 8 * 8 * 9 /* += 8 * 8 */
329my $M2 = 640; # = 8 * 8 * 10 /* += 8 * 8 */
330my $K1 = 704; # = 8 * 8 * 11 /* += 8 * 8 */
331
332#
333# FUNCTIONS
334#
335
336{{{
337#
338# MULADD_128x512 : Function to multiply 128-bits (2 qwords) by 512-bits (8 qwords)
339# and add 512-bits (8 qwords)
340# to get 640 bits (10 qwords)
341# Input: 128-bit mul source: [rdi+8*1], rbp
342# 512-bit mul source: [rsi+8*n]
343# 512-bit add source: r15, r14, ..., r9, r8
344# Output: r9, r8, r15, r14, r13, r12, r11, r10, [rcx+8*1], [rcx+8*0]
345# Clobbers all regs except: rcx, rsi, rdi
346$code.=<<___;
347.type MULADD_128x512,\@abi-omnipotent
348.align 16
349MULADD_128x512:
350 _CET_ENDBR
351___
352 &MULSTEP_512([map("%r$_",(8..15))], "(+8*0)(%rcx)", "%rsi", "%rbp", "%rbx");
353$code.=<<___;
354 mov (+8*1)(%rdi), %rbp
355___
356 &MULSTEP_512([map("%r$_",(9..15,8))], "(+8*1)(%rcx)", "%rsi", "%rbp", "%rbx");
357$code.=<<___;
358 ret
359.size MULADD_128x512,.-MULADD_128x512
360___
361}}}
362
363{{{
364#MULADD_256x512 MACRO pDst, pA, pB, OP, TMP, X7, X6, X5, X4, X3, X2, X1, X0
365#
366# Inputs: pDst: Destination (768 bits, 12 qwords)
367# pA: Multiplicand (1024 bits, 16 qwords)
368# pB: Multiplicand (512 bits, 8 qwords)
369# Dst = Ah * B + Al
370# where Ah is (in qwords) A[15:12] (256 bits) and Al is A[7:0] (512 bits)
371# Results in X3 X2 X1 X0 X7 X6 X5 X4 Dst[3:0]
372# Uses registers: arguments, RAX, RDX
373sub MULADD_256x512
374{
375 my ($pDst, $pA, $pB, $OP, $TMP, $X)=@_;
376$code.=<<___;
377 mov (+8*12)($pA), $OP
378___
379 &MULSTEP_512_ADD($X, "(+8*0)($pDst)", $pB, $pA, $OP, $TMP);
380 push(@$X,shift(@$X));
381
382$code.=<<___;
383 mov (+8*13)($pA), $OP
384___
385 &MULSTEP_512($X, "(+8*1)($pDst)", $pB, $OP, $TMP);
386 push(@$X,shift(@$X));
387
388$code.=<<___;
389 mov (+8*14)($pA), $OP
390___
391 &MULSTEP_512($X, "(+8*2)($pDst)", $pB, $OP, $TMP);
392 push(@$X,shift(@$X));
393
394$code.=<<___;
395 mov (+8*15)($pA), $OP
396___
397 &MULSTEP_512($X, "(+8*3)($pDst)", $pB, $OP, $TMP);
398 push(@$X,shift(@$X));
399}
400
401#
402# mont_reduce(UINT64 *x, /* 1024 bits, 16 qwords */
403# UINT64 *m, /* 512 bits, 8 qwords */
404# MODF_2FOLD_MONT_512_C1_DATA *data,
405# UINT64 *r) /* 512 bits, 8 qwords */
406# Input: x (number to be reduced): tmp16 (Implicit)
407# m (modulus): [pM] (Implicit)
408# data (reduce data): [pData] (Implicit)
409# Output: r (result): Address in [red_res_addr]
410# result also in: r9, r8, r15, r14, r13, r12, r11, r10
411
412my @X=map("%r$_",(8..15));
413
414$code.=<<___;
415.type mont_reduce,\@abi-omnipotent
416.align 16
417mont_reduce:
418 _CET_ENDBR
419___
420
421my $STACK_DEPTH = 8;
422 #
423 # X1 = Xh * M1 + Xl
424$code.=<<___;
425 lea (+$Reduce_Data_offset+$X1_offset+$STACK_DEPTH)(%rsp), %rdi # pX1 (Dst) 769 bits, 13 qwords
426 mov (+$pData_offset+$STACK_DEPTH)(%rsp), %rsi # pM1 (Bsrc) 512 bits, 8 qwords
427 add \$$M1, %rsi
428 lea (+$tmp16_offset+$STACK_DEPTH)(%rsp), %rcx # X (Asrc) 1024 bits, 16 qwords
429
430___
431
432 &MULADD_256x512("%rdi", "%rcx", "%rsi", "%rbp", "%rbx", \@X); # rotates @X 4 times
433 # results in r11, r10, r9, r8, r15, r14, r13, r12, X1[3:0]
434
435$code.=<<___;
436 xor %rax, %rax
437 # X1 += xl
438 add (+8*8)(%rcx), $X[4]
439 adc (+8*9)(%rcx), $X[5]
440 adc (+8*10)(%rcx), $X[6]
441 adc (+8*11)(%rcx), $X[7]
442 adc \$0, %rax
443 # X1 is now rax, r11-r8, r15-r12, tmp16[3:0]
444
445 #
446 # check for carry ;; carry stored in rax
447 mov $X[4], (+8*8)(%rdi) # rdi points to X1
448 mov $X[5], (+8*9)(%rdi)
449 mov $X[6], %rbp
450 mov $X[7], (+8*11)(%rdi)
451
452 mov %rax, (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp)
453
454 mov (+8*0)(%rdi), $X[4]
455 mov (+8*1)(%rdi), $X[5]
456 mov (+8*2)(%rdi), $X[6]
457 mov (+8*3)(%rdi), $X[7]
458
459 # X1 is now stored in: X1[11], rbp, X1[9:8], r15-r8
460 # rdi -> X1
461 # rsi -> M1
462
463 #
464 # X2 = Xh * M2 + Xl
465 # do first part (X2 = Xh * M2)
466 add \$8*10, %rdi # rdi -> pXh ; 128 bits, 2 qwords
467 # Xh is actually { [rdi+8*1], rbp }
468 add \$`$M2-$M1`, %rsi # rsi -> M2
469 lea (+$Reduce_Data_offset+$X2_offset+$STACK_DEPTH)(%rsp), %rcx # rcx -> pX2 ; 641 bits, 11 qwords
470___
471 unshift(@X,pop(@X)); unshift(@X,pop(@X));
472$code.=<<___;
473
474 call MULADD_128x512 # args in rcx, rdi / rbp, rsi, r15-r8
475 # result in r9, r8, r15, r14, r13, r12, r11, r10, X2[1:0]
476 mov (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp), %rax
477
478 # X2 += Xl
479 add (+8*8-8*10)(%rdi), $X[6] # (-8*10) is to adjust rdi -> Xh to Xl
480 adc (+8*9-8*10)(%rdi), $X[7]
481 mov $X[6], (+8*8)(%rcx)
482 mov $X[7], (+8*9)(%rcx)
483
484 adc %rax, %rax
485 mov %rax, (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp)
486
487 lea (+$Reduce_Data_offset+$Q_offset+$STACK_DEPTH)(%rsp), %rdi # rdi -> pQ ; 128 bits, 2 qwords
488 add \$`$K1-$M2`, %rsi # rsi -> pK1 ; 128 bits, 2 qwords
489
490 # MUL_128x128t128 rdi, rcx, rsi ; Q = X2 * K1 (bottom half)
491 # B1:B0 = rsi[1:0] = K1[1:0]
492 # A1:A0 = rcx[1:0] = X2[1:0]
493 # Result = rdi[1],rbp = Q[1],rbp
494 mov (%rsi), %r8 # B0
495 mov (+8*1)(%rsi), %rbx # B1
496
497 mov (%rcx), %rax # A0
498 mul %r8 # B0
499 mov %rax, %rbp
500 mov %rdx, %r9
501
502 mov (+8*1)(%rcx), %rax # A1
503 mul %r8 # B0
504 add %rax, %r9
505
506 mov (%rcx), %rax # A0
507 mul %rbx # B1
508 add %rax, %r9
509
510 mov %r9, (+8*1)(%rdi)
511 # end MUL_128x128t128
512
513 sub \$`$K1-$M`, %rsi
514
515 mov (%rcx), $X[6]
516 mov (+8*1)(%rcx), $X[7] # r9:r8 = X2[1:0]
517
518 call MULADD_128x512 # args in rcx, rdi / rbp, rsi, r15-r8
519 # result in r9, r8, r15, r14, r13, r12, r11, r10, X2[1:0]
520
521 # load first half of m to rdx, rdi, rbx, rax
522 # moved this here for efficiency
523 mov (+8*0)(%rsi), %rax
524 mov (+8*1)(%rsi), %rbx
525 mov (+8*2)(%rsi), %rdi
526 mov (+8*3)(%rsi), %rdx
527
528 # continue with reduction
529 mov (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp), %rbp
530
531 add (+8*8)(%rcx), $X[6]
532 adc (+8*9)(%rcx), $X[7]
533
534 #accumulate the final carry to rbp
535 adc %rbp, %rbp
536
537 # Add in overflow corrections: R = (X2>>128) += T[overflow]
538 # R = {r9, r8, r15, r14, ..., r10}
539 shl \$3, %rbp
540 mov (+$pData_offset+$STACK_DEPTH)(%rsp), %rcx # rsi -> Data (and points to T)
541 add %rcx, %rbp # pT ; 512 bits, 8 qwords, spread out
542
543 # rsi will be used to generate a mask after the addition
544 xor %rsi, %rsi
545
546 add (+8*8*0)(%rbp), $X[0]
547 adc (+8*8*1)(%rbp), $X[1]
548 adc (+8*8*2)(%rbp), $X[2]
549 adc (+8*8*3)(%rbp), $X[3]
550 adc (+8*8*4)(%rbp), $X[4]
551 adc (+8*8*5)(%rbp), $X[5]
552 adc (+8*8*6)(%rbp), $X[6]
553 adc (+8*8*7)(%rbp), $X[7]
554
555 # if there is a carry: rsi = 0xFFFFFFFFFFFFFFFF
556 # if carry is clear: rsi = 0x0000000000000000
557 sbb \$0, %rsi
558
559 # if carry is clear, subtract 0. Otherwise, subtract 256 bits of m
560 and %rsi, %rax
561 and %rsi, %rbx
562 and %rsi, %rdi
563 and %rsi, %rdx
564
565 mov \$1, %rbp
566 sub %rax, $X[0]
567 sbb %rbx, $X[1]
568 sbb %rdi, $X[2]
569 sbb %rdx, $X[3]
570
571 # if there is a borrow: rbp = 0
572 # if there is no borrow: rbp = 1
573 # this is used to save the borrows in between the first half and the 2nd half of the subtraction of m
574 sbb \$0, %rbp
575
576 #load second half of m to rdx, rdi, rbx, rax
577
578 add \$$M, %rcx
579 mov (+8*4)(%rcx), %rax
580 mov (+8*5)(%rcx), %rbx
581 mov (+8*6)(%rcx), %rdi
582 mov (+8*7)(%rcx), %rdx
583
584 # use the rsi mask as before
585 # if carry is clear, subtract 0. Otherwise, subtract 256 bits of m
586 and %rsi, %rax
587 and %rsi, %rbx
588 and %rsi, %rdi
589 and %rsi, %rdx
590
591 # if rbp = 0, there was a borrow before, it is moved to the carry flag
592 # if rbp = 1, there was not a borrow before, carry flag is cleared
593 sub \$1, %rbp
594
595 sbb %rax, $X[4]
596 sbb %rbx, $X[5]
597 sbb %rdi, $X[6]
598 sbb %rdx, $X[7]
599
600 # write R back to memory
601
602 mov (+$red_result_addr_offset+$STACK_DEPTH)(%rsp), %rsi
603 mov $X[0], (+8*0)(%rsi)
604 mov $X[1], (+8*1)(%rsi)
605 mov $X[2], (+8*2)(%rsi)
606 mov $X[3], (+8*3)(%rsi)
607 mov $X[4], (+8*4)(%rsi)
608 mov $X[5], (+8*5)(%rsi)
609 mov $X[6], (+8*6)(%rsi)
610 mov $X[7], (+8*7)(%rsi)
611
612 ret
613.size mont_reduce,.-mont_reduce
614___
615}}}
616
617{{{
618#MUL_512x512 MACRO pDst, pA, pB, x7, x6, x5, x4, x3, x2, x1, x0, tmp*2
619#
620# Inputs: pDst: Destination (1024 bits, 16 qwords)
621# pA: Multiplicand (512 bits, 8 qwords)
622# pB: Multiplicand (512 bits, 8 qwords)
623# Uses registers rax, rdx, args
624# B operand in [pB] and also in x7...x0
625sub MUL_512x512
626{
627 my ($pDst, $pA, $pB, $x, $OP, $TMP, $pDst_o)=@_;
628 my ($pDst, $pDst_o) = ($pDst =~ m/([^+]*)\+?(.*)?/);
629 my @X=@$x; # make a copy
630
631$code.=<<___;
632 mov (+8*0)($pA), $OP
633
634 mov $X[0], %rax
635 mul $OP # rdx:rax = %OP * [0]
636 mov %rax, (+$pDst_o+8*0)($pDst)
637 mov %rdx, $X[0]
638___
639for(my $i=1;$i<8;$i++) {
640$code.=<<___;
641 mov $X[$i], %rax
642 mul $OP # rdx:rax = %OP * [$i]
643 add %rax, $X[$i-1]
644 adc \$0, %rdx
645 mov %rdx, $X[$i]
646___
647}
648
649for(my $i=1;$i<8;$i++) {
650$code.=<<___;
651 mov (+8*$i)($pA), $OP
652___
653
654 &MULSTEP_512(\@X, "(+$pDst_o+8*$i)($pDst)", $pB, $OP, $TMP);
655 push(@X,shift(@X));
656}
657
658$code.=<<___;
659 mov $X[0], (+$pDst_o+8*8)($pDst)
660 mov $X[1], (+$pDst_o+8*9)($pDst)
661 mov $X[2], (+$pDst_o+8*10)($pDst)
662 mov $X[3], (+$pDst_o+8*11)($pDst)
663 mov $X[4], (+$pDst_o+8*12)($pDst)
664 mov $X[5], (+$pDst_o+8*13)($pDst)
665 mov $X[6], (+$pDst_o+8*14)($pDst)
666 mov $X[7], (+$pDst_o+8*15)($pDst)
667___
668}
669
670#
671# mont_mul_a3b : subroutine to compute (Src1 * Src2) % M (all 512-bits)
672# Input: src1: Address of source 1: rdi
673# src2: Address of source 2: rsi
674# Output: dst: Address of destination: [red_res_addr]
675# src2 and result also in: r9, r8, r15, r14, r13, r12, r11, r10
676# Temp: Clobbers [tmp16], all registers
677$code.=<<___;
678.type mont_mul_a3b,\@abi-omnipotent
679.align 16
680mont_mul_a3b:
681 _CET_ENDBR
682 #
683 # multiply tmp = src1 * src2
684 # For multiply: dst = rcx, src1 = rdi, src2 = rsi
685 # stack depth is extra 8 from call
686___
687 &MUL_512x512("%rsp+$tmp16_offset+8", "%rdi", "%rsi", [map("%r$_",(10..15,8..9))], "%rbp", "%rbx");
688$code.=<<___;
689 #
690 # Dst = tmp % m
691 # Call reduce(tmp, m, data, dst)
692
693 # tail recursion optimization: jmp to mont_reduce and return from there
694 jmp mont_reduce
695 # call mont_reduce
696 # ret
697.size mont_mul_a3b,.-mont_mul_a3b
698___
699}}}
700
701{{{
702#SQR_512 MACRO pDest, pA, x7, x6, x5, x4, x3, x2, x1, x0, tmp*4
703#
704# Input in memory [pA] and also in x7...x0
705# Uses all argument registers plus rax and rdx
706#
707# This version computes all of the off-diagonal terms into memory,
708# and then it adds in the diagonal terms
709
710sub SQR_512
711{
712 my ($pDst, $pA, $x, $A, $tmp, $x7, $x6, $pDst_o)=@_;
713 my ($pDst, $pDst_o) = ($pDst =~ m/([^+]*)\+?(.*)?/);
714 my @X=@$x; # make a copy
715$code.=<<___;
716 # ------------------
717 # first pass 01...07
718 # ------------------
719 mov $X[0], $A
720
721 mov $X[1],%rax
722 mul $A
723 mov %rax, (+$pDst_o+8*1)($pDst)
724___
725for(my $i=2;$i<8;$i++) {
726$code.=<<___;
727 mov %rdx, $X[$i-2]
728 mov $X[$i],%rax
729 mul $A
730 add %rax, $X[$i-2]
731 adc \$0, %rdx
732___
733}
734$code.=<<___;
735 mov %rdx, $x7
736
737 mov $X[0], (+$pDst_o+8*2)($pDst)
738
739 # ------------------
740 # second pass 12...17
741 # ------------------
742
743 mov (+8*1)($pA), $A
744
745 mov (+8*2)($pA),%rax
746 mul $A
747 add %rax, $X[1]
748 adc \$0, %rdx
749 mov $X[1], (+$pDst_o+8*3)($pDst)
750
751 mov %rdx, $X[0]
752 mov (+8*3)($pA),%rax
753 mul $A
754 add %rax, $X[2]
755 adc \$0, %rdx
756 add $X[0], $X[2]
757 adc \$0, %rdx
758 mov $X[2], (+$pDst_o+8*4)($pDst)
759
760 mov %rdx, $X[0]
761 mov (+8*4)($pA),%rax
762 mul $A
763 add %rax, $X[3]
764 adc \$0, %rdx
765 add $X[0], $X[3]
766 adc \$0, %rdx
767
768 mov %rdx, $X[0]
769 mov (+8*5)($pA),%rax
770 mul $A
771 add %rax, $X[4]
772 adc \$0, %rdx
773 add $X[0], $X[4]
774 adc \$0, %rdx
775
776 mov %rdx, $X[0]
777 mov $X[6],%rax
778 mul $A
779 add %rax, $X[5]
780 adc \$0, %rdx
781 add $X[0], $X[5]
782 adc \$0, %rdx
783
784 mov %rdx, $X[0]
785 mov $X[7],%rax
786 mul $A
787 add %rax, $x7
788 adc \$0, %rdx
789 add $X[0], $x7
790 adc \$0, %rdx
791
792 mov %rdx, $X[1]
793
794 # ------------------
795 # third pass 23...27
796 # ------------------
797 mov (+8*2)($pA), $A
798
799 mov (+8*3)($pA),%rax
800 mul $A
801 add %rax, $X[3]
802 adc \$0, %rdx
803 mov $X[3], (+$pDst_o+8*5)($pDst)
804
805 mov %rdx, $X[0]
806 mov (+8*4)($pA),%rax
807 mul $A
808 add %rax, $X[4]
809 adc \$0, %rdx
810 add $X[0], $X[4]
811 adc \$0, %rdx
812 mov $X[4], (+$pDst_o+8*6)($pDst)
813
814 mov %rdx, $X[0]
815 mov (+8*5)($pA),%rax
816 mul $A
817 add %rax, $X[5]
818 adc \$0, %rdx
819 add $X[0], $X[5]
820 adc \$0, %rdx
821
822 mov %rdx, $X[0]
823 mov $X[6],%rax
824 mul $A
825 add %rax, $x7
826 adc \$0, %rdx
827 add $X[0], $x7
828 adc \$0, %rdx
829
830 mov %rdx, $X[0]
831 mov $X[7],%rax
832 mul $A
833 add %rax, $X[1]
834 adc \$0, %rdx
835 add $X[0], $X[1]
836 adc \$0, %rdx
837
838 mov %rdx, $X[2]
839
840 # ------------------
841 # fourth pass 34...37
842 # ------------------
843
844 mov (+8*3)($pA), $A
845
846 mov (+8*4)($pA),%rax
847 mul $A
848 add %rax, $X[5]
849 adc \$0, %rdx
850 mov $X[5], (+$pDst_o+8*7)($pDst)
851
852 mov %rdx, $X[0]
853 mov (+8*5)($pA),%rax
854 mul $A
855 add %rax, $x7
856 adc \$0, %rdx
857 add $X[0], $x7
858 adc \$0, %rdx
859 mov $x7, (+$pDst_o+8*8)($pDst)
860
861 mov %rdx, $X[0]
862 mov $X[6],%rax
863 mul $A
864 add %rax, $X[1]
865 adc \$0, %rdx
866 add $X[0], $X[1]
867 adc \$0, %rdx
868
869 mov %rdx, $X[0]
870 mov $X[7],%rax
871 mul $A
872 add %rax, $X[2]
873 adc \$0, %rdx
874 add $X[0], $X[2]
875 adc \$0, %rdx
876
877 mov %rdx, $X[5]
878
879 # ------------------
880 # fifth pass 45...47
881 # ------------------
882 mov (+8*4)($pA), $A
883
884 mov (+8*5)($pA),%rax
885 mul $A
886 add %rax, $X[1]
887 adc \$0, %rdx
888 mov $X[1], (+$pDst_o+8*9)($pDst)
889
890 mov %rdx, $X[0]
891 mov $X[6],%rax
892 mul $A
893 add %rax, $X[2]
894 adc \$0, %rdx
895 add $X[0], $X[2]
896 adc \$0, %rdx
897 mov $X[2], (+$pDst_o+8*10)($pDst)
898
899 mov %rdx, $X[0]
900 mov $X[7],%rax
901 mul $A
902 add %rax, $X[5]
903 adc \$0, %rdx
904 add $X[0], $X[5]
905 adc \$0, %rdx
906
907 mov %rdx, $X[1]
908
909 # ------------------
910 # sixth pass 56...57
911 # ------------------
912 mov (+8*5)($pA), $A
913
914 mov $X[6],%rax
915 mul $A
916 add %rax, $X[5]
917 adc \$0, %rdx
918 mov $X[5], (+$pDst_o+8*11)($pDst)
919
920 mov %rdx, $X[0]
921 mov $X[7],%rax
922 mul $A
923 add %rax, $X[1]
924 adc \$0, %rdx
925 add $X[0], $X[1]
926 adc \$0, %rdx
927 mov $X[1], (+$pDst_o+8*12)($pDst)
928
929 mov %rdx, $X[2]
930
931 # ------------------
932 # seventh pass 67
933 # ------------------
934 mov $X[6], $A
935
936 mov $X[7],%rax
937 mul $A
938 add %rax, $X[2]
939 adc \$0, %rdx
940 mov $X[2], (+$pDst_o+8*13)($pDst)
941
942 mov %rdx, (+$pDst_o+8*14)($pDst)
943
944 # start finalize (add in squares, and double off-terms)
945 mov (+$pDst_o+8*1)($pDst), $X[0]
946 mov (+$pDst_o+8*2)($pDst), $X[1]
947 mov (+$pDst_o+8*3)($pDst), $X[2]
948 mov (+$pDst_o+8*4)($pDst), $X[3]
949 mov (+$pDst_o+8*5)($pDst), $X[4]
950 mov (+$pDst_o+8*6)($pDst), $X[5]
951
952 mov (+8*3)($pA), %rax
953 mul %rax
954 mov %rax, $x6
955 mov %rdx, $X[6]
956
957 add $X[0], $X[0]
958 adc $X[1], $X[1]
959 adc $X[2], $X[2]
960 adc $X[3], $X[3]
961 adc $X[4], $X[4]
962 adc $X[5], $X[5]
963 adc \$0, $X[6]
964
965 mov (+8*0)($pA), %rax
966 mul %rax
967 mov %rax, (+$pDst_o+8*0)($pDst)
968 mov %rdx, $A
969
970 mov (+8*1)($pA), %rax
971 mul %rax
972
973 add $A, $X[0]
974 adc %rax, $X[1]
975 adc \$0, %rdx
976
977 mov %rdx, $A
978 mov $X[0], (+$pDst_o+8*1)($pDst)
979 mov $X[1], (+$pDst_o+8*2)($pDst)
980
981 mov (+8*2)($pA), %rax
982 mul %rax
983
984 add $A, $X[2]
985 adc %rax, $X[3]
986 adc \$0, %rdx
987
988 mov %rdx, $A
989
990 mov $X[2], (+$pDst_o+8*3)($pDst)
991 mov $X[3], (+$pDst_o+8*4)($pDst)
992
993 xor $tmp, $tmp
994 add $A, $X[4]
995 adc $x6, $X[5]
996 adc \$0, $tmp
997
998 mov $X[4], (+$pDst_o+8*5)($pDst)
999 mov $X[5], (+$pDst_o+8*6)($pDst)
1000
1001 # %%tmp has 0/1 in column 7
1002 # %%A6 has a full value in column 7
1003
1004 mov (+$pDst_o+8*7)($pDst), $X[0]
1005 mov (+$pDst_o+8*8)($pDst), $X[1]
1006 mov (+$pDst_o+8*9)($pDst), $X[2]
1007 mov (+$pDst_o+8*10)($pDst), $X[3]
1008 mov (+$pDst_o+8*11)($pDst), $X[4]
1009 mov (+$pDst_o+8*12)($pDst), $X[5]
1010 mov (+$pDst_o+8*13)($pDst), $x6
1011 mov (+$pDst_o+8*14)($pDst), $x7
1012
1013 mov $X[7], %rax
1014 mul %rax
1015 mov %rax, $X[7]
1016 mov %rdx, $A
1017
1018 add $X[0], $X[0]
1019 adc $X[1], $X[1]
1020 adc $X[2], $X[2]
1021 adc $X[3], $X[3]
1022 adc $X[4], $X[4]
1023 adc $X[5], $X[5]
1024 adc $x6, $x6
1025 adc $x7, $x7
1026 adc \$0, $A
1027
1028 add $tmp, $X[0]
1029
1030 mov (+8*4)($pA), %rax
1031 mul %rax
1032
1033 add $X[6], $X[0]
1034 adc %rax, $X[1]
1035 adc \$0, %rdx
1036
1037 mov %rdx, $tmp
1038
1039 mov $X[0], (+$pDst_o+8*7)($pDst)
1040 mov $X[1], (+$pDst_o+8*8)($pDst)
1041
1042 mov (+8*5)($pA), %rax
1043 mul %rax
1044
1045 add $tmp, $X[2]
1046 adc %rax, $X[3]
1047 adc \$0, %rdx
1048
1049 mov %rdx, $tmp
1050
1051 mov $X[2], (+$pDst_o+8*9)($pDst)
1052 mov $X[3], (+$pDst_o+8*10)($pDst)
1053
1054 mov (+8*6)($pA), %rax
1055 mul %rax
1056
1057 add $tmp, $X[4]
1058 adc %rax, $X[5]
1059 adc \$0, %rdx
1060
1061 mov $X[4], (+$pDst_o+8*11)($pDst)
1062 mov $X[5], (+$pDst_o+8*12)($pDst)
1063
1064 add %rdx, $x6
1065 adc $X[7], $x7
1066 adc \$0, $A
1067
1068 mov $x6, (+$pDst_o+8*13)($pDst)
1069 mov $x7, (+$pDst_o+8*14)($pDst)
1070 mov $A, (+$pDst_o+8*15)($pDst)
1071___
1072}
1073
1074#
1075# sqr_reduce: subroutine to compute Result = reduce(Result * Result)
1076#
1077# input and result also in: r9, r8, r15, r14, r13, r12, r11, r10
1078#
1079$code.=<<___;
1080.type sqr_reduce,\@abi-omnipotent
1081.align 16
1082sqr_reduce:
1083 _CET_ENDBR
1084 mov (+$pResult_offset+8)(%rsp), %rcx
1085___
1086 &SQR_512("%rsp+$tmp16_offset+8", "%rcx", [map("%r$_",(10..15,8..9))], "%rbx", "%rbp", "%rsi", "%rdi");
1087$code.=<<___;
1088 # tail recursion optimization: jmp to mont_reduce and return from there
1089 jmp mont_reduce
1090 # call mont_reduce
1091 # ret
1092.size sqr_reduce,.-sqr_reduce
1093___
1094}}}
1095
1096#
1097# MAIN FUNCTION
1098#
1099
1100#mod_exp_512(UINT64 *result, /* 512 bits, 8 qwords */
1101# UINT64 *g, /* 512 bits, 8 qwords */
1102# UINT64 *exp, /* 512 bits, 8 qwords */
1103# struct mod_ctx_512 *data)
1104
1105# window size = 5
1106# table size = 2^5 = 32
1107#table_entries equ 32
1108#table_size equ table_entries * 8
1109$code.=<<___;
1110.globl mod_exp_512
1111.type mod_exp_512,\@function,4
1112mod_exp_512:
1113 _CET_ENDBR
1114 push %rbp
1115 push %rbx
1116 push %r12
1117 push %r13
1118 push %r14
1119 push %r15
1120
1121 # adjust stack down and then align it with cache boundary
1122 mov %rsp, %r8
1123 sub \$$mem_size, %rsp
1124 and \$-64, %rsp
1125
1126 # store previous stack pointer and arguments
1127 mov %r8, (+$rsp_offset)(%rsp)
1128 mov %rdi, (+$pResult_offset)(%rsp)
1129 mov %rsi, (+$pG_offset)(%rsp)
1130 mov %rcx, (+$pData_offset)(%rsp)
1131.Lbody:
1132 # transform g into montgomery space
1133 # GT = reduce(g * C2) = reduce(g * (2^256))
1134 # reduce expects to have the input in [tmp16]
1135 pxor %xmm4, %xmm4
1136 movdqu (+16*0)(%rsi), %xmm0
1137 movdqu (+16*1)(%rsi), %xmm1
1138 movdqu (+16*2)(%rsi), %xmm2
1139 movdqu (+16*3)(%rsi), %xmm3
1140 movdqa %xmm4, (+$tmp16_offset+16*0)(%rsp)
1141 movdqa %xmm4, (+$tmp16_offset+16*1)(%rsp)
1142 movdqa %xmm4, (+$tmp16_offset+16*6)(%rsp)
1143 movdqa %xmm4, (+$tmp16_offset+16*7)(%rsp)
1144 movdqa %xmm0, (+$tmp16_offset+16*2)(%rsp)
1145 movdqa %xmm1, (+$tmp16_offset+16*3)(%rsp)
1146 movdqa %xmm2, (+$tmp16_offset+16*4)(%rsp)
1147 movdqa %xmm3, (+$tmp16_offset+16*5)(%rsp)
1148
1149 # load pExp before rdx gets blown away
1150 movdqu (+16*0)(%rdx), %xmm0
1151 movdqu (+16*1)(%rdx), %xmm1
1152 movdqu (+16*2)(%rdx), %xmm2
1153 movdqu (+16*3)(%rdx), %xmm3
1154
1155 lea (+$GT_offset)(%rsp), %rbx
1156 mov %rbx, (+$red_result_addr_offset)(%rsp)
1157 call mont_reduce
1158
1159 # Initialize tmp = C
1160 lea (+$tmp_offset)(%rsp), %rcx
1161 xor %rax, %rax
1162 mov %rax, (+8*0)(%rcx)
1163 mov %rax, (+8*1)(%rcx)
1164 mov %rax, (+8*3)(%rcx)
1165 mov %rax, (+8*4)(%rcx)
1166 mov %rax, (+8*5)(%rcx)
1167 mov %rax, (+8*6)(%rcx)
1168 mov %rax, (+8*7)(%rcx)
1169 mov %rax, (+$exp_offset+8*8)(%rsp)
1170 movq \$1, (+8*2)(%rcx)
1171
1172 lea (+$garray_offset)(%rsp), %rbp
1173 mov %rcx, %rsi # pTmp
1174 mov %rbp, %rdi # Garray[][0]
1175___
1176
1177 &swizzle("%rdi", "%rcx", "%rax", "%rbx");
1178
1179 # for (rax = 31; rax != 0; rax--) {
1180 # tmp = reduce(tmp * G)
1181 # swizzle(pg, tmp);
1182 # pg += 2; }
1183$code.=<<___;
1184 mov \$31, %rax
1185 mov %rax, (+$i_offset)(%rsp)
1186 mov %rbp, (+$pg_offset)(%rsp)
1187 # rsi -> pTmp
1188 mov %rsi, (+$red_result_addr_offset)(%rsp)
1189 mov (+8*0)(%rsi), %r10
1190 mov (+8*1)(%rsi), %r11
1191 mov (+8*2)(%rsi), %r12
1192 mov (+8*3)(%rsi), %r13
1193 mov (+8*4)(%rsi), %r14
1194 mov (+8*5)(%rsi), %r15
1195 mov (+8*6)(%rsi), %r8
1196 mov (+8*7)(%rsi), %r9
1197init_loop:
1198 lea (+$GT_offset)(%rsp), %rdi
1199 call mont_mul_a3b
1200 lea (+$tmp_offset)(%rsp), %rsi
1201 mov (+$pg_offset)(%rsp), %rbp
1202 add \$2, %rbp
1203 mov %rbp, (+$pg_offset)(%rsp)
1204 mov %rsi, %rcx # rcx = rsi = addr of tmp
1205___
1206
1207 &swizzle("%rbp", "%rcx", "%rax", "%rbx");
1208$code.=<<___;
1209 mov (+$i_offset)(%rsp), %rax
1210 sub \$1, %rax
1211 mov %rax, (+$i_offset)(%rsp)
1212 jne init_loop
1213
1214 #
1215 # Copy exponent onto stack
1216 movdqa %xmm0, (+$exp_offset+16*0)(%rsp)
1217 movdqa %xmm1, (+$exp_offset+16*1)(%rsp)
1218 movdqa %xmm2, (+$exp_offset+16*2)(%rsp)
1219 movdqa %xmm3, (+$exp_offset+16*3)(%rsp)
1220
1221
1222 #
1223 # Do exponentiation
1224 # Initialize result to G[exp{511:507}]
1225 mov (+$exp_offset+62)(%rsp), %eax
1226 mov %rax, %rdx
1227 shr \$11, %rax
1228 and \$0x07FF, %edx
1229 mov %edx, (+$exp_offset+62)(%rsp)
1230 lea (+$garray_offset)(%rsp,%rax,2), %rsi
1231 mov (+$pResult_offset)(%rsp), %rdx
1232___
1233
1234 &unswizzle("%rdx", "%rsi", "%rbp", "%rbx", "%rax");
1235
1236 #
1237 # Loop variables
1238 # rcx = [loop_idx] = index: 510-5 to 0 by 5
1239$code.=<<___;
1240 movq \$505, (+$loop_idx_offset)(%rsp)
1241
1242 mov (+$pResult_offset)(%rsp), %rcx
1243 mov %rcx, (+$red_result_addr_offset)(%rsp)
1244 mov (+8*0)(%rcx), %r10
1245 mov (+8*1)(%rcx), %r11
1246 mov (+8*2)(%rcx), %r12
1247 mov (+8*3)(%rcx), %r13
1248 mov (+8*4)(%rcx), %r14
1249 mov (+8*5)(%rcx), %r15
1250 mov (+8*6)(%rcx), %r8
1251 mov (+8*7)(%rcx), %r9
1252 jmp sqr_2
1253
1254main_loop_a3b:
1255 call sqr_reduce
1256 call sqr_reduce
1257 call sqr_reduce
1258sqr_2:
1259 call sqr_reduce
1260 call sqr_reduce
1261
1262 #
1263 # Do multiply, first look up proper value in Garray
1264 mov (+$loop_idx_offset)(%rsp), %rcx # bit index
1265 mov %rcx, %rax
1266 shr \$4, %rax # rax is word pointer
1267 mov (+$exp_offset)(%rsp,%rax,2), %edx
1268 and \$15, %rcx
1269 shrq %cl, %rdx
1270 and \$0x1F, %rdx
1271
1272 lea (+$garray_offset)(%rsp,%rdx,2), %rsi
1273 lea (+$tmp_offset)(%rsp), %rdx
1274 mov %rdx, %rdi
1275___
1276
1277 &unswizzle("%rdx", "%rsi", "%rbp", "%rbx", "%rax");
1278 # rdi = tmp = pG
1279
1280 #
1281 # Call mod_mul_a1(pDst, pSrc1, pSrc2, pM, pData)
1282 # result result pG M Data
1283$code.=<<___;
1284 mov (+$pResult_offset)(%rsp), %rsi
1285 call mont_mul_a3b
1286
1287 #
1288 # finish loop
1289 mov (+$loop_idx_offset)(%rsp), %rcx
1290 sub \$5, %rcx
1291 mov %rcx, (+$loop_idx_offset)(%rsp)
1292 jge main_loop_a3b
1293
1294 #
1295
1296end_main_loop_a3b:
1297 # transform result out of Montgomery space
1298 # result = reduce(result)
1299 mov (+$pResult_offset)(%rsp), %rdx
1300 pxor %xmm4, %xmm4
1301 movdqu (+16*0)(%rdx), %xmm0
1302 movdqu (+16*1)(%rdx), %xmm1
1303 movdqu (+16*2)(%rdx), %xmm2
1304 movdqu (+16*3)(%rdx), %xmm3
1305 movdqa %xmm4, (+$tmp16_offset+16*4)(%rsp)
1306 movdqa %xmm4, (+$tmp16_offset+16*5)(%rsp)
1307 movdqa %xmm4, (+$tmp16_offset+16*6)(%rsp)
1308 movdqa %xmm4, (+$tmp16_offset+16*7)(%rsp)
1309 movdqa %xmm0, (+$tmp16_offset+16*0)(%rsp)
1310 movdqa %xmm1, (+$tmp16_offset+16*1)(%rsp)
1311 movdqa %xmm2, (+$tmp16_offset+16*2)(%rsp)
1312 movdqa %xmm3, (+$tmp16_offset+16*3)(%rsp)
1313 call mont_reduce
1314
1315 # If result > m, subtract m
1316 # load result into r15:r8
1317 mov (+$pResult_offset)(%rsp), %rax
1318 mov (+8*0)(%rax), %r8
1319 mov (+8*1)(%rax), %r9
1320 mov (+8*2)(%rax), %r10
1321 mov (+8*3)(%rax), %r11
1322 mov (+8*4)(%rax), %r12
1323 mov (+8*5)(%rax), %r13
1324 mov (+8*6)(%rax), %r14
1325 mov (+8*7)(%rax), %r15
1326
1327 # subtract m
1328 mov (+$pData_offset)(%rsp), %rbx
1329 add \$$M, %rbx
1330
1331 sub (+8*0)(%rbx), %r8
1332 sbb (+8*1)(%rbx), %r9
1333 sbb (+8*2)(%rbx), %r10
1334 sbb (+8*3)(%rbx), %r11
1335 sbb (+8*4)(%rbx), %r12
1336 sbb (+8*5)(%rbx), %r13
1337 sbb (+8*6)(%rbx), %r14
1338 sbb (+8*7)(%rbx), %r15
1339
1340 # if Carry is clear, replace result with difference
1341 mov (+8*0)(%rax), %rsi
1342 mov (+8*1)(%rax), %rdi
1343 mov (+8*2)(%rax), %rcx
1344 mov (+8*3)(%rax), %rdx
1345 cmovnc %r8, %rsi
1346 cmovnc %r9, %rdi
1347 cmovnc %r10, %rcx
1348 cmovnc %r11, %rdx
1349 mov %rsi, (+8*0)(%rax)
1350 mov %rdi, (+8*1)(%rax)
1351 mov %rcx, (+8*2)(%rax)
1352 mov %rdx, (+8*3)(%rax)
1353
1354 mov (+8*4)(%rax), %rsi
1355 mov (+8*5)(%rax), %rdi
1356 mov (+8*6)(%rax), %rcx
1357 mov (+8*7)(%rax), %rdx
1358 cmovnc %r12, %rsi
1359 cmovnc %r13, %rdi
1360 cmovnc %r14, %rcx
1361 cmovnc %r15, %rdx
1362 mov %rsi, (+8*4)(%rax)
1363 mov %rdi, (+8*5)(%rax)
1364 mov %rcx, (+8*6)(%rax)
1365 mov %rdx, (+8*7)(%rax)
1366
1367 mov (+$rsp_offset)(%rsp), %rsi
1368 mov 0(%rsi),%r15
1369 mov 8(%rsi),%r14
1370 mov 16(%rsi),%r13
1371 mov 24(%rsi),%r12
1372 mov 32(%rsi),%rbx
1373 mov 40(%rsi),%rbp
1374 lea 48(%rsi),%rsp
1375.Lepilogue:
1376 ret
1377.size mod_exp_512, . - mod_exp_512
1378___
1379
1380sub reg_part {
1381my ($reg,$conv)=@_;
1382 if ($reg =~ /%r[0-9]+/) { $reg .= $conv; }
1383 elsif ($conv eq "b") { $reg =~ s/%[er]([^x]+)x?/%$1l/; }
1384 elsif ($conv eq "w") { $reg =~ s/%[er](.+)/%$1/; }
1385 elsif ($conv eq "d") { $reg =~ s/%[er](.+)/%e$1/; }
1386 return $reg;
1387}
1388
1389$code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem;
1390$code =~ s/\`([^\`]*)\`/eval $1/gem;
1391$code =~ s/(\(\+[^)]+\))/eval $1/gem;
1392print $code;
1393close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/parisc-mont.pl b/src/lib/libcrypto/bn/asm/parisc-mont.pl
deleted file mode 100644
index 0c7aff93b9..0000000000
--- a/src/lib/libcrypto/bn/asm/parisc-mont.pl
+++ /dev/null
@@ -1,985 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# On PA-7100LC this module performs ~90-50% better, less for longer
11# keys, than code generated by gcc 3.2 for PA-RISC 1.1. Latter means
12# that compiler utilized xmpyu instruction to perform 32x32=64-bit
13# multiplication, which in turn means that "baseline" performance was
14# optimal in respect to instruction set capabilities. Fair comparison
15# with vendor compiler is problematic, because OpenSSL doesn't define
16# BN_LLONG [presumably] for historical reasons, which drives compiler
17# toward 4 times 16x16=32-bit multiplicatons [plus complementary
18# shifts and additions] instead. This means that you should observe
19# several times improvement over code generated by vendor compiler
20# for PA-RISC 1.1, but the "baseline" is far from optimal. The actual
21# improvement coefficient was never collected on PA-7100LC, or any
22# other 1.1 CPU, because I don't have access to such machine with
23# vendor compiler. But to give you a taste, PA-RISC 1.1 code path
24# reportedly outperformed code generated by cc +DA1.1 +O3 by factor
25# of ~5x on PA-8600.
26#
27# On PA-RISC 2.0 it has to compete with pa-risc2[W].s, which is
28# reportedly ~2x faster than vendor compiler generated code [according
29# to comment in pa-risc2[W].s]. Here comes a catch. Execution core of
30# this implementation is actually 32-bit one, in the sense that it
31# operates on 32-bit values. But pa-risc2[W].s operates on arrays of
32# 64-bit BN_LONGs... How do they interoperate then? No problem. This
33# module picks halves of 64-bit values in reverse order and pretends
34# they were 32-bit BN_LONGs. But can 32-bit core compete with "pure"
35# 64-bit code such as pa-risc2[W].s then? Well, the thing is that
36# 32x32=64-bit multiplication is the best even PA-RISC 2.0 can do,
37# i.e. there is no "wider" multiplication like on most other 64-bit
38# platforms. This means that even being effectively 32-bit, this
39# implementation performs "64-bit" computational task in same amount
40# of arithmetic operations, most notably multiplications. It requires
41# more memory references, most notably to tp[num], but this doesn't
42# seem to exhaust memory port capacity. And indeed, dedicated PA-RISC
43# 2.0 code path provides virtually same performance as pa-risc2[W].s:
44# it's ~10% better for shortest key length and ~10% worse for longest
45# one.
46#
47# In case it wasn't clear. The module has two distinct code paths:
48# PA-RISC 1.1 and PA-RISC 2.0 ones. Latter features carry-free 64-bit
49# additions and 64-bit integer loads, not to mention specific
50# instruction scheduling. In 64-bit build naturally only 2.0 code path
51# is assembled. In 32-bit application context both code paths are
52# assembled, PA-RISC 2.0 CPU is detected at run-time and proper path
53# is taken automatically. Also, in 32-bit build the module imposes
54# couple of limitations: vector lengths has to be even and vector
55# addresses has to be 64-bit aligned. Normally neither is a problem:
56# most common key lengths are even and vectors are commonly malloc-ed,
57# which ensures alignment.
58#
59# Special thanks to polarhome.com for providing HP-UX account on
60# PA-RISC 1.1 machine, and to correspondent who chose to remain
61# anonymous for testing the code on PA-RISC 2.0 machine.
62
63$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
64
65$flavour = shift;
66$output = shift;
67
68open STDOUT,">$output";
69
70if ($flavour =~ /64/) {
71 $LEVEL ="2.0W";
72 $SIZE_T =8;
73 $FRAME_MARKER =80;
74 $SAVED_RP =16;
75 $PUSH ="std";
76 $PUSHMA ="std,ma";
77 $POP ="ldd";
78 $POPMB ="ldd,mb";
79 $BN_SZ =$SIZE_T;
80} else {
81 $LEVEL ="1.1"; #$LEVEL.="\n\t.ALLOW\t2.0";
82 $SIZE_T =4;
83 $FRAME_MARKER =48;
84 $SAVED_RP =20;
85 $PUSH ="stw";
86 $PUSHMA ="stwm";
87 $POP ="ldw";
88 $POPMB ="ldwm";
89 $BN_SZ =$SIZE_T;
90}
91
92$FRAME=8*$SIZE_T+$FRAME_MARKER; # 8 saved regs + frame marker
93 # [+ argument transfer]
94$LOCALS=$FRAME-$FRAME_MARKER;
95$FRAME+=32; # local variables
96
97$tp="%r31";
98$ti1="%r29";
99$ti0="%r28";
100
101$rp="%r26";
102$ap="%r25";
103$bp="%r24";
104$np="%r23";
105$n0="%r22"; # passed through stack in 32-bit
106$num="%r21"; # passed through stack in 32-bit
107$idx="%r20";
108$arrsz="%r19";
109
110$nm1="%r7";
111$nm0="%r6";
112$ab1="%r5";
113$ab0="%r4";
114
115$fp="%r3";
116$hi1="%r2";
117$hi0="%r1";
118
119$xfer=$n0; # accommodates [-16..15] offset in fld[dw]s
120
121$fm0="%fr4"; $fti=$fm0;
122$fbi="%fr5L";
123$fn0="%fr5R";
124$fai="%fr6"; $fab0="%fr7"; $fab1="%fr8";
125$fni="%fr9"; $fnm0="%fr10"; $fnm1="%fr11";
126
127$code=<<___;
128 .LEVEL $LEVEL
129 .text
130
131 .EXPORT bn_mul_mont,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
132 .ALIGN 64
133bn_mul_mont
134 .PROC
135 .CALLINFO FRAME=`$FRAME-8*$SIZE_T`,NO_CALLS,SAVE_RP,SAVE_SP,ENTRY_GR=6
136 .ENTRY
137 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
138 $PUSHMA %r3,$FRAME(%sp)
139 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
140 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
141 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
142 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
143 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
144 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
145 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
146 ldo -$FRAME(%sp),$fp
147___
148$code.=<<___ if ($SIZE_T==4);
149 ldw `-$FRAME_MARKER-4`($fp),$n0
150 ldw `-$FRAME_MARKER-8`($fp),$num
151 nop
152 nop ; alignment
153___
154$code.=<<___ if ($BN_SZ==4);
155 comiclr,<= 6,$num,%r0 ; are vectors long enough?
156 b L\$abort
157 ldi 0,%r28 ; signal "unhandled"
158 add,ev %r0,$num,$num ; is $num even?
159 b L\$abort
160 nop
161 or $ap,$np,$ti1
162 extru,= $ti1,31,3,%r0 ; are ap and np 64-bit aligned?
163 b L\$abort
164 nop
165 nop ; alignment
166 nop
167
168 fldws 0($n0),${fn0}
169 fldws,ma 4($bp),${fbi} ; bp[0]
170___
171$code.=<<___ if ($BN_SZ==8);
172 comib,> 3,$num,L\$abort ; are vectors long enough?
173 ldi 0,%r28 ; signal "unhandled"
174 addl $num,$num,$num ; I operate on 32-bit values
175
176 fldws 4($n0),${fn0} ; only low part of n0
177 fldws 4($bp),${fbi} ; bp[0] in flipped word order
178___
179$code.=<<___;
180 fldds 0($ap),${fai} ; ap[0,1]
181 fldds 0($np),${fni} ; np[0,1]
182
183 sh2addl $num,%r0,$arrsz
184 ldi 31,$hi0
185 ldo 36($arrsz),$hi1 ; space for tp[num+1]
186 andcm $hi1,$hi0,$hi1 ; align
187 addl $hi1,%sp,%sp
188 $PUSH $fp,-$SIZE_T(%sp)
189
190 ldo `$LOCALS+16`($fp),$xfer
191 ldo `$LOCALS+32+4`($fp),$tp
192
193 xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[0]
194 xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[0]
195 xmpyu ${fn0},${fab0}R,${fm0}
196
197 addl $arrsz,$ap,$ap ; point at the end
198 addl $arrsz,$np,$np
199 subi 0,$arrsz,$idx ; j=0
200 ldo 8($idx),$idx ; j++++
201
202 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[0]*m
203 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[1]*m
204 fstds ${fab0},-16($xfer)
205 fstds ${fnm0},-8($xfer)
206 fstds ${fab1},0($xfer)
207 fstds ${fnm1},8($xfer)
208 flddx $idx($ap),${fai} ; ap[2,3]
209 flddx $idx($np),${fni} ; np[2,3]
210___
211$code.=<<___ if ($BN_SZ==4);
212#ifdef __LP64__
213 mtctl $hi0,%cr11 ; $hi0 still holds 31
214 extrd,u,*= $hi0,%sar,1,$hi0 ; executes on PA-RISC 1.0
215 b L\$parisc11
216 nop
217___
218$code.=<<___; # PA-RISC 2.0 code-path
219 xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0]
220 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
221 ldd -16($xfer),$ab0
222 fstds ${fab0},-16($xfer)
223
224 extrd,u $ab0,31,32,$hi0
225 extrd,u $ab0,63,32,$ab0
226 ldd -8($xfer),$nm0
227 fstds ${fnm0},-8($xfer)
228 ldo 8($idx),$idx ; j++++
229 addl $ab0,$nm0,$nm0 ; low part is discarded
230 extrd,u $nm0,31,32,$hi1
231
232L\$1st
233 xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[0]
234 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m
235 ldd 0($xfer),$ab1
236 fstds ${fab1},0($xfer)
237 addl $hi0,$ab1,$ab1
238 extrd,u $ab1,31,32,$hi0
239 ldd 8($xfer),$nm1
240 fstds ${fnm1},8($xfer)
241 extrd,u $ab1,63,32,$ab1
242 addl $hi1,$nm1,$nm1
243 flddx $idx($ap),${fai} ; ap[j,j+1]
244 flddx $idx($np),${fni} ; np[j,j+1]
245 addl $ab1,$nm1,$nm1
246 extrd,u $nm1,31,32,$hi1
247
248 xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0]
249 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
250 ldd -16($xfer),$ab0
251 fstds ${fab0},-16($xfer)
252 addl $hi0,$ab0,$ab0
253 extrd,u $ab0,31,32,$hi0
254 ldd -8($xfer),$nm0
255 fstds ${fnm0},-8($xfer)
256 extrd,u $ab0,63,32,$ab0
257 addl $hi1,$nm0,$nm0
258 stw $nm1,-4($tp) ; tp[j-1]
259 addl $ab0,$nm0,$nm0
260 stw,ma $nm0,8($tp) ; tp[j-1]
261 addib,<> 8,$idx,L\$1st ; j++++
262 extrd,u $nm0,31,32,$hi1
263
264 xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[0]
265 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m
266 ldd 0($xfer),$ab1
267 fstds ${fab1},0($xfer)
268 addl $hi0,$ab1,$ab1
269 extrd,u $ab1,31,32,$hi0
270 ldd 8($xfer),$nm1
271 fstds ${fnm1},8($xfer)
272 extrd,u $ab1,63,32,$ab1
273 addl $hi1,$nm1,$nm1
274 ldd -16($xfer),$ab0
275 addl $ab1,$nm1,$nm1
276 ldd -8($xfer),$nm0
277 extrd,u $nm1,31,32,$hi1
278
279 addl $hi0,$ab0,$ab0
280 extrd,u $ab0,31,32,$hi0
281 stw $nm1,-4($tp) ; tp[j-1]
282 extrd,u $ab0,63,32,$ab0
283 addl $hi1,$nm0,$nm0
284 ldd 0($xfer),$ab1
285 addl $ab0,$nm0,$nm0
286 ldd,mb 8($xfer),$nm1
287 extrd,u $nm0,31,32,$hi1
288 stw,ma $nm0,8($tp) ; tp[j-1]
289
290 ldo -1($num),$num ; i--
291 subi 0,$arrsz,$idx ; j=0
292___
293$code.=<<___ if ($BN_SZ==4);
294 fldws,ma 4($bp),${fbi} ; bp[1]
295___
296$code.=<<___ if ($BN_SZ==8);
297 fldws 0($bp),${fbi} ; bp[1] in flipped word order
298___
299$code.=<<___;
300 flddx $idx($ap),${fai} ; ap[0,1]
301 flddx $idx($np),${fni} ; np[0,1]
302 fldws 8($xfer),${fti}R ; tp[0]
303 addl $hi0,$ab1,$ab1
304 extrd,u $ab1,31,32,$hi0
305 extrd,u $ab1,63,32,$ab1
306 ldo 8($idx),$idx ; j++++
307 xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[1]
308 xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[1]
309 addl $hi1,$nm1,$nm1
310 addl $ab1,$nm1,$nm1
311 extrd,u $nm1,31,32,$hi1
312 fstws,mb ${fab0}L,-8($xfer) ; save high part
313 stw $nm1,-4($tp) ; tp[j-1]
314
315 fcpy,sgl %fr0,${fti}L ; zero high part
316 fcpy,sgl %fr0,${fab0}L
317 addl $hi1,$hi0,$hi0
318 extrd,u $hi0,31,32,$hi1
319 fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double
320 fcnvxf,dbl,dbl ${fab0},${fab0}
321 stw $hi0,0($tp)
322 stw $hi1,4($tp)
323
324 fadd,dbl ${fti},${fab0},${fab0} ; add tp[0]
325 fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int
326 xmpyu ${fn0},${fab0}R,${fm0}
327 ldo `$LOCALS+32+4`($fp),$tp
328L\$outer
329 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[0]*m
330 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[1]*m
331 fstds ${fab0},-16($xfer) ; 33-bit value
332 fstds ${fnm0},-8($xfer)
333 flddx $idx($ap),${fai} ; ap[2]
334 flddx $idx($np),${fni} ; np[2]
335 ldo 8($idx),$idx ; j++++
336 ldd -16($xfer),$ab0 ; 33-bit value
337 ldd -8($xfer),$nm0
338 ldw 0($xfer),$hi0 ; high part
339
340 xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i]
341 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
342 extrd,u $ab0,31,32,$ti0 ; carry bit
343 extrd,u $ab0,63,32,$ab0
344 fstds ${fab1},0($xfer)
345 addl $ti0,$hi0,$hi0 ; account carry bit
346 fstds ${fnm1},8($xfer)
347 addl $ab0,$nm0,$nm0 ; low part is discarded
348 ldw 0($tp),$ti1 ; tp[1]
349 extrd,u $nm0,31,32,$hi1
350 fstds ${fab0},-16($xfer)
351 fstds ${fnm0},-8($xfer)
352
353L\$inner
354 xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[i]
355 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m
356 ldd 0($xfer),$ab1
357 fstds ${fab1},0($xfer)
358 addl $hi0,$ti1,$ti1
359 addl $ti1,$ab1,$ab1
360 ldd 8($xfer),$nm1
361 fstds ${fnm1},8($xfer)
362 extrd,u $ab1,31,32,$hi0
363 extrd,u $ab1,63,32,$ab1
364 flddx $idx($ap),${fai} ; ap[j,j+1]
365 flddx $idx($np),${fni} ; np[j,j+1]
366 addl $hi1,$nm1,$nm1
367 addl $ab1,$nm1,$nm1
368 ldw 4($tp),$ti0 ; tp[j]
369 stw $nm1,-4($tp) ; tp[j-1]
370
371 xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i]
372 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
373 ldd -16($xfer),$ab0
374 fstds ${fab0},-16($xfer)
375 addl $hi0,$ti0,$ti0
376 addl $ti0,$ab0,$ab0
377 ldd -8($xfer),$nm0
378 fstds ${fnm0},-8($xfer)
379 extrd,u $ab0,31,32,$hi0
380 extrd,u $nm1,31,32,$hi1
381 ldw 8($tp),$ti1 ; tp[j]
382 extrd,u $ab0,63,32,$ab0
383 addl $hi1,$nm0,$nm0
384 addl $ab0,$nm0,$nm0
385 stw,ma $nm0,8($tp) ; tp[j-1]
386 addib,<> 8,$idx,L\$inner ; j++++
387 extrd,u $nm0,31,32,$hi1
388
389 xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[i]
390 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m
391 ldd 0($xfer),$ab1
392 fstds ${fab1},0($xfer)
393 addl $hi0,$ti1,$ti1
394 addl $ti1,$ab1,$ab1
395 ldd 8($xfer),$nm1
396 fstds ${fnm1},8($xfer)
397 extrd,u $ab1,31,32,$hi0
398 extrd,u $ab1,63,32,$ab1
399 ldw 4($tp),$ti0 ; tp[j]
400 addl $hi1,$nm1,$nm1
401 addl $ab1,$nm1,$nm1
402 ldd -16($xfer),$ab0
403 ldd -8($xfer),$nm0
404 extrd,u $nm1,31,32,$hi1
405
406 addl $hi0,$ab0,$ab0
407 addl $ti0,$ab0,$ab0
408 stw $nm1,-4($tp) ; tp[j-1]
409 extrd,u $ab0,31,32,$hi0
410 ldw 8($tp),$ti1 ; tp[j]
411 extrd,u $ab0,63,32,$ab0
412 addl $hi1,$nm0,$nm0
413 ldd 0($xfer),$ab1
414 addl $ab0,$nm0,$nm0
415 ldd,mb 8($xfer),$nm1
416 extrd,u $nm0,31,32,$hi1
417 stw,ma $nm0,8($tp) ; tp[j-1]
418
419 addib,= -1,$num,L\$outerdone ; i--
420 subi 0,$arrsz,$idx ; j=0
421___
422$code.=<<___ if ($BN_SZ==4);
423 fldws,ma 4($bp),${fbi} ; bp[i]
424___
425$code.=<<___ if ($BN_SZ==8);
426 ldi 12,$ti0 ; bp[i] in flipped word order
427 addl,ev %r0,$num,$num
428 ldi -4,$ti0
429 addl $ti0,$bp,$bp
430 fldws 0($bp),${fbi}
431___
432$code.=<<___;
433 flddx $idx($ap),${fai} ; ap[0]
434 addl $hi0,$ab1,$ab1
435 flddx $idx($np),${fni} ; np[0]
436 fldws 8($xfer),${fti}R ; tp[0]
437 addl $ti1,$ab1,$ab1
438 extrd,u $ab1,31,32,$hi0
439 extrd,u $ab1,63,32,$ab1
440
441 ldo 8($idx),$idx ; j++++
442 xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[i]
443 xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[i]
444 ldw 4($tp),$ti0 ; tp[j]
445
446 addl $hi1,$nm1,$nm1
447 fstws,mb ${fab0}L,-8($xfer) ; save high part
448 addl $ab1,$nm1,$nm1
449 extrd,u $nm1,31,32,$hi1
450 fcpy,sgl %fr0,${fti}L ; zero high part
451 fcpy,sgl %fr0,${fab0}L
452 stw $nm1,-4($tp) ; tp[j-1]
453
454 fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double
455 fcnvxf,dbl,dbl ${fab0},${fab0}
456 addl $hi1,$hi0,$hi0
457 fadd,dbl ${fti},${fab0},${fab0} ; add tp[0]
458 addl $ti0,$hi0,$hi0
459 extrd,u $hi0,31,32,$hi1
460 fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int
461 stw $hi0,0($tp)
462 stw $hi1,4($tp)
463 xmpyu ${fn0},${fab0}R,${fm0}
464
465 b L\$outer
466 ldo `$LOCALS+32+4`($fp),$tp
467
468L\$outerdone
469 addl $hi0,$ab1,$ab1
470 addl $ti1,$ab1,$ab1
471 extrd,u $ab1,31,32,$hi0
472 extrd,u $ab1,63,32,$ab1
473
474 ldw 4($tp),$ti0 ; tp[j]
475
476 addl $hi1,$nm1,$nm1
477 addl $ab1,$nm1,$nm1
478 extrd,u $nm1,31,32,$hi1
479 stw $nm1,-4($tp) ; tp[j-1]
480
481 addl $hi1,$hi0,$hi0
482 addl $ti0,$hi0,$hi0
483 extrd,u $hi0,31,32,$hi1
484 stw $hi0,0($tp)
485 stw $hi1,4($tp)
486
487 ldo `$LOCALS+32`($fp),$tp
488 sub %r0,%r0,%r0 ; clear borrow
489___
490$code.=<<___ if ($BN_SZ==4);
491 ldws,ma 4($tp),$ti0
492 extru,= $rp,31,3,%r0 ; is rp 64-bit aligned?
493 b L\$sub_pa11
494 addl $tp,$arrsz,$tp
495L\$sub
496 ldwx $idx($np),$hi0
497 subb $ti0,$hi0,$hi1
498 ldwx $idx($tp),$ti0
499 addib,<> 4,$idx,L\$sub
500 stws,ma $hi1,4($rp)
501
502 subb $ti0,%r0,$hi1
503 ldo -4($tp),$tp
504___
505$code.=<<___ if ($BN_SZ==8);
506 ldd,ma 8($tp),$ti0
507L\$sub
508 ldd $idx($np),$hi0
509 shrpd $ti0,$ti0,32,$ti0 ; flip word order
510 std $ti0,-8($tp) ; save flipped value
511 sub,db $ti0,$hi0,$hi1
512 ldd,ma 8($tp),$ti0
513 addib,<> 8,$idx,L\$sub
514 std,ma $hi1,8($rp)
515
516 extrd,u $ti0,31,32,$ti0 ; carry in flipped word order
517 sub,db $ti0,%r0,$hi1
518 ldo -8($tp),$tp
519___
520$code.=<<___;
521 and $tp,$hi1,$ap
522 andcm $rp,$hi1,$bp
523 or $ap,$bp,$np
524
525 sub $rp,$arrsz,$rp ; rewind rp
526 subi 0,$arrsz,$idx
527 ldo `$LOCALS+32`($fp),$tp
528L\$copy
529 ldd $idx($np),$hi0
530 std,ma %r0,8($tp)
531 addib,<> 8,$idx,.-8 ; L\$copy
532 std,ma $hi0,8($rp)
533___
534
535if ($BN_SZ==4) { # PA-RISC 1.1 code-path
536$ablo=$ab0;
537$abhi=$ab1;
538$nmlo0=$nm0;
539$nmhi0=$nm1;
540$nmlo1="%r9";
541$nmhi1="%r8";
542
543$code.=<<___;
544 b L\$done
545 nop
546
547 .ALIGN 8
548L\$parisc11
549#endif
550 xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0]
551 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
552 ldw -12($xfer),$ablo
553 ldw -16($xfer),$hi0
554 ldw -4($xfer),$nmlo0
555 ldw -8($xfer),$nmhi0
556 fstds ${fab0},-16($xfer)
557 fstds ${fnm0},-8($xfer)
558
559 ldo 8($idx),$idx ; j++++
560 add $ablo,$nmlo0,$nmlo0 ; discarded
561 addc %r0,$nmhi0,$hi1
562 ldw 4($xfer),$ablo
563 ldw 0($xfer),$abhi
564 nop
565
566L\$1st_pa11
567 xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[0]
568 flddx $idx($ap),${fai} ; ap[j,j+1]
569 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m
570 flddx $idx($np),${fni} ; np[j,j+1]
571 add $hi0,$ablo,$ablo
572 ldw 12($xfer),$nmlo1
573 addc %r0,$abhi,$hi0
574 ldw 8($xfer),$nmhi1
575 add $ablo,$nmlo1,$nmlo1
576 fstds ${fab1},0($xfer)
577 addc %r0,$nmhi1,$nmhi1
578 fstds ${fnm1},8($xfer)
579 add $hi1,$nmlo1,$nmlo1
580 ldw -12($xfer),$ablo
581 addc %r0,$nmhi1,$hi1
582 ldw -16($xfer),$abhi
583
584 xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0]
585 ldw -4($xfer),$nmlo0
586 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
587 ldw -8($xfer),$nmhi0
588 add $hi0,$ablo,$ablo
589 stw $nmlo1,-4($tp) ; tp[j-1]
590 addc %r0,$abhi,$hi0
591 fstds ${fab0},-16($xfer)
592 add $ablo,$nmlo0,$nmlo0
593 fstds ${fnm0},-8($xfer)
594 addc %r0,$nmhi0,$nmhi0
595 ldw 0($xfer),$abhi
596 add $hi1,$nmlo0,$nmlo0
597 ldw 4($xfer),$ablo
598 stws,ma $nmlo0,8($tp) ; tp[j-1]
599 addib,<> 8,$idx,L\$1st_pa11 ; j++++
600 addc %r0,$nmhi0,$hi1
601
602 ldw 8($xfer),$nmhi1
603 ldw 12($xfer),$nmlo1
604 xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[0]
605 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m
606 add $hi0,$ablo,$ablo
607 fstds ${fab1},0($xfer)
608 addc %r0,$abhi,$hi0
609 fstds ${fnm1},8($xfer)
610 add $ablo,$nmlo1,$nmlo1
611 ldw -16($xfer),$abhi
612 addc %r0,$nmhi1,$nmhi1
613 ldw -12($xfer),$ablo
614 add $hi1,$nmlo1,$nmlo1
615 ldw -8($xfer),$nmhi0
616 addc %r0,$nmhi1,$hi1
617 ldw -4($xfer),$nmlo0
618
619 add $hi0,$ablo,$ablo
620 stw $nmlo1,-4($tp) ; tp[j-1]
621 addc %r0,$abhi,$hi0
622 ldw 0($xfer),$abhi
623 add $ablo,$nmlo0,$nmlo0
624 ldw 4($xfer),$ablo
625 addc %r0,$nmhi0,$nmhi0
626 ldws,mb 8($xfer),$nmhi1
627 add $hi1,$nmlo0,$nmlo0
628 ldw 4($xfer),$nmlo1
629 addc %r0,$nmhi0,$hi1
630 stws,ma $nmlo0,8($tp) ; tp[j-1]
631
632 ldo -1($num),$num ; i--
633 subi 0,$arrsz,$idx ; j=0
634
635 fldws,ma 4($bp),${fbi} ; bp[1]
636 flddx $idx($ap),${fai} ; ap[0,1]
637 flddx $idx($np),${fni} ; np[0,1]
638 fldws 8($xfer),${fti}R ; tp[0]
639 add $hi0,$ablo,$ablo
640 addc %r0,$abhi,$hi0
641 ldo 8($idx),$idx ; j++++
642 xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[1]
643 xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[1]
644 add $hi1,$nmlo1,$nmlo1
645 addc %r0,$nmhi1,$nmhi1
646 add $ablo,$nmlo1,$nmlo1
647 addc %r0,$nmhi1,$hi1
648 fstws,mb ${fab0}L,-8($xfer) ; save high part
649 stw $nmlo1,-4($tp) ; tp[j-1]
650
651 fcpy,sgl %fr0,${fti}L ; zero high part
652 fcpy,sgl %fr0,${fab0}L
653 add $hi1,$hi0,$hi0
654 addc %r0,%r0,$hi1
655 fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double
656 fcnvxf,dbl,dbl ${fab0},${fab0}
657 stw $hi0,0($tp)
658 stw $hi1,4($tp)
659
660 fadd,dbl ${fti},${fab0},${fab0} ; add tp[0]
661 fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int
662 xmpyu ${fn0},${fab0}R,${fm0}
663 ldo `$LOCALS+32+4`($fp),$tp
664L\$outer_pa11
665 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[0]*m
666 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[1]*m
667 fstds ${fab0},-16($xfer) ; 33-bit value
668 fstds ${fnm0},-8($xfer)
669 flddx $idx($ap),${fai} ; ap[2,3]
670 flddx $idx($np),${fni} ; np[2,3]
671 ldw -16($xfer),$abhi ; carry bit actually
672 ldo 8($idx),$idx ; j++++
673 ldw -12($xfer),$ablo
674 ldw -8($xfer),$nmhi0
675 ldw -4($xfer),$nmlo0
676 ldw 0($xfer),$hi0 ; high part
677
678 xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i]
679 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
680 fstds ${fab1},0($xfer)
681 addl $abhi,$hi0,$hi0 ; account carry bit
682 fstds ${fnm1},8($xfer)
683 add $ablo,$nmlo0,$nmlo0 ; discarded
684 ldw 0($tp),$ti1 ; tp[1]
685 addc %r0,$nmhi0,$hi1
686 fstds ${fab0},-16($xfer)
687 fstds ${fnm0},-8($xfer)
688 ldw 4($xfer),$ablo
689 ldw 0($xfer),$abhi
690
691L\$inner_pa11
692 xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[i]
693 flddx $idx($ap),${fai} ; ap[j,j+1]
694 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m
695 flddx $idx($np),${fni} ; np[j,j+1]
696 add $hi0,$ablo,$ablo
697 ldw 4($tp),$ti0 ; tp[j]
698 addc %r0,$abhi,$abhi
699 ldw 12($xfer),$nmlo1
700 add $ti1,$ablo,$ablo
701 ldw 8($xfer),$nmhi1
702 addc %r0,$abhi,$hi0
703 fstds ${fab1},0($xfer)
704 add $ablo,$nmlo1,$nmlo1
705 fstds ${fnm1},8($xfer)
706 addc %r0,$nmhi1,$nmhi1
707 ldw -12($xfer),$ablo
708 add $hi1,$nmlo1,$nmlo1
709 ldw -16($xfer),$abhi
710 addc %r0,$nmhi1,$hi1
711
712 xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i]
713 ldw 8($tp),$ti1 ; tp[j]
714 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
715 ldw -4($xfer),$nmlo0
716 add $hi0,$ablo,$ablo
717 ldw -8($xfer),$nmhi0
718 addc %r0,$abhi,$abhi
719 stw $nmlo1,-4($tp) ; tp[j-1]
720 add $ti0,$ablo,$ablo
721 fstds ${fab0},-16($xfer)
722 addc %r0,$abhi,$hi0
723 fstds ${fnm0},-8($xfer)
724 add $ablo,$nmlo0,$nmlo0
725 ldw 4($xfer),$ablo
726 addc %r0,$nmhi0,$nmhi0
727 ldw 0($xfer),$abhi
728 add $hi1,$nmlo0,$nmlo0
729 stws,ma $nmlo0,8($tp) ; tp[j-1]
730 addib,<> 8,$idx,L\$inner_pa11 ; j++++
731 addc %r0,$nmhi0,$hi1
732
733 xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[i]
734 ldw 12($xfer),$nmlo1
735 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m
736 ldw 8($xfer),$nmhi1
737 add $hi0,$ablo,$ablo
738 ldw 4($tp),$ti0 ; tp[j]
739 addc %r0,$abhi,$abhi
740 fstds ${fab1},0($xfer)
741 add $ti1,$ablo,$ablo
742 fstds ${fnm1},8($xfer)
743 addc %r0,$abhi,$hi0
744 ldw -16($xfer),$abhi
745 add $ablo,$nmlo1,$nmlo1
746 ldw -12($xfer),$ablo
747 addc %r0,$nmhi1,$nmhi1
748 ldw -8($xfer),$nmhi0
749 add $hi1,$nmlo1,$nmlo1
750 ldw -4($xfer),$nmlo0
751 addc %r0,$nmhi1,$hi1
752
753 add $hi0,$ablo,$ablo
754 stw $nmlo1,-4($tp) ; tp[j-1]
755 addc %r0,$abhi,$abhi
756 add $ti0,$ablo,$ablo
757 ldw 8($tp),$ti1 ; tp[j]
758 addc %r0,$abhi,$hi0
759 ldw 0($xfer),$abhi
760 add $ablo,$nmlo0,$nmlo0
761 ldw 4($xfer),$ablo
762 addc %r0,$nmhi0,$nmhi0
763 ldws,mb 8($xfer),$nmhi1
764 add $hi1,$nmlo0,$nmlo0
765 ldw 4($xfer),$nmlo1
766 addc %r0,$nmhi0,$hi1
767 stws,ma $nmlo0,8($tp) ; tp[j-1]
768
769 addib,= -1,$num,L\$outerdone_pa11; i--
770 subi 0,$arrsz,$idx ; j=0
771
772 fldws,ma 4($bp),${fbi} ; bp[i]
773 flddx $idx($ap),${fai} ; ap[0]
774 add $hi0,$ablo,$ablo
775 addc %r0,$abhi,$abhi
776 flddx $idx($np),${fni} ; np[0]
777 fldws 8($xfer),${fti}R ; tp[0]
778 add $ti1,$ablo,$ablo
779 addc %r0,$abhi,$hi0
780
781 ldo 8($idx),$idx ; j++++
782 xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[i]
783 xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[i]
784 ldw 4($tp),$ti0 ; tp[j]
785
786 add $hi1,$nmlo1,$nmlo1
787 addc %r0,$nmhi1,$nmhi1
788 fstws,mb ${fab0}L,-8($xfer) ; save high part
789 add $ablo,$nmlo1,$nmlo1
790 addc %r0,$nmhi1,$hi1
791 fcpy,sgl %fr0,${fti}L ; zero high part
792 fcpy,sgl %fr0,${fab0}L
793 stw $nmlo1,-4($tp) ; tp[j-1]
794
795 fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double
796 fcnvxf,dbl,dbl ${fab0},${fab0}
797 add $hi1,$hi0,$hi0
798 addc %r0,%r0,$hi1
799 fadd,dbl ${fti},${fab0},${fab0} ; add tp[0]
800 add $ti0,$hi0,$hi0
801 addc %r0,$hi1,$hi1
802 fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int
803 stw $hi0,0($tp)
804 stw $hi1,4($tp)
805 xmpyu ${fn0},${fab0}R,${fm0}
806
807 b L\$outer_pa11
808 ldo `$LOCALS+32+4`($fp),$tp
809
810L\$outerdone_pa11
811 add $hi0,$ablo,$ablo
812 addc %r0,$abhi,$abhi
813 add $ti1,$ablo,$ablo
814 addc %r0,$abhi,$hi0
815
816 ldw 4($tp),$ti0 ; tp[j]
817
818 add $hi1,$nmlo1,$nmlo1
819 addc %r0,$nmhi1,$nmhi1
820 add $ablo,$nmlo1,$nmlo1
821 addc %r0,$nmhi1,$hi1
822 stw $nmlo1,-4($tp) ; tp[j-1]
823
824 add $hi1,$hi0,$hi0
825 addc %r0,%r0,$hi1
826 add $ti0,$hi0,$hi0
827 addc %r0,$hi1,$hi1
828 stw $hi0,0($tp)
829 stw $hi1,4($tp)
830
831 ldo `$LOCALS+32+4`($fp),$tp
832 sub %r0,%r0,%r0 ; clear borrow
833 ldw -4($tp),$ti0
834 addl $tp,$arrsz,$tp
835L\$sub_pa11
836 ldwx $idx($np),$hi0
837 subb $ti0,$hi0,$hi1
838 ldwx $idx($tp),$ti0
839 addib,<> 4,$idx,L\$sub_pa11
840 stws,ma $hi1,4($rp)
841
842 subb $ti0,%r0,$hi1
843 ldo -4($tp),$tp
844 and $tp,$hi1,$ap
845 andcm $rp,$hi1,$bp
846 or $ap,$bp,$np
847
848 sub $rp,$arrsz,$rp ; rewind rp
849 subi 0,$arrsz,$idx
850 ldo `$LOCALS+32`($fp),$tp
851L\$copy_pa11
852 ldwx $idx($np),$hi0
853 stws,ma %r0,4($tp)
854 addib,<> 4,$idx,L\$copy_pa11
855 stws,ma $hi0,4($rp)
856
857 nop ; alignment
858L\$done
859___
860}
861
862$code.=<<___;
863 ldi 1,%r28 ; signal "handled"
864 ldo $FRAME($fp),%sp ; destroy tp[num+1]
865
866 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
867 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
868 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
869 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
870 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
871 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
872 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
873 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
874L\$abort
875 bv (%r2)
876 .EXIT
877 $POPMB -$FRAME(%sp),%r3
878 .PROCEND
879___
880
881# Explicitly encode PA-RISC 2.0 instructions used in this module, so
882# that it can be compiled with .LEVEL 1.0. It should be noted that I
883# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
884# directive...
885
886my $ldd = sub {
887 my ($mod,$args) = @_;
888 my $orig = "ldd$mod\t$args";
889
890 if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4
891 { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3;
892 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
893 }
894 elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5
895 { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3;
896 $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset
897 $opcode|=(1<<5) if ($mod =~ /^,m/);
898 $opcode|=(1<<13) if ($mod =~ /^,mb/);
899 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
900 }
901 else { "\t".$orig; }
902};
903
904my $std = sub {
905 my ($mod,$args) = @_;
906 my $orig = "std$mod\t$args";
907
908 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 6
909 { my $opcode=(0x03<<26)|($3<<21)|($1<<16)|(1<<12)|(0xB<<6);
910 $opcode|=(($2&0xF)<<1)|(($2&0x10)>>4); # encode offset
911 $opcode|=(1<<5) if ($mod =~ /^,m/);
912 $opcode|=(1<<13) if ($mod =~ /^,mb/);
913 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
914 }
915 else { "\t".$orig; }
916};
917
918my $extrd = sub {
919 my ($mod,$args) = @_;
920 my $orig = "extrd$mod\t$args";
921
922 # I only have ",u" completer, it's implicitly encoded...
923 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
924 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
925 my $len=32-$3;
926 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
927 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
928 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
929 }
930 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
931 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
932 my $len=32-$2;
933 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
934 $opcode |= (1<<13) if ($mod =~ /,\**=/);
935 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
936 }
937 else { "\t".$orig; }
938};
939
940my $shrpd = sub {
941 my ($mod,$args) = @_;
942 my $orig = "shrpd$mod\t$args";
943
944 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
945 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
946 my $cpos=63-$3;
947 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
948 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
949 }
950 else { "\t".$orig; }
951};
952
953my $sub = sub {
954 my ($mod,$args) = @_;
955 my $orig = "sub$mod\t$args";
956
957 if ($mod eq ",db" && $args =~ /%r([0-9]+),%r([0-9]+),%r([0-9]+)/) {
958 my $opcode=(0x02<<26)|($2<<21)|($1<<16)|$3;
959 $opcode|=(1<<10); # e1
960 $opcode|=(1<<8); # e2
961 $opcode|=(1<<5); # d
962 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig
963 }
964 else { "\t".$orig; }
965};
966
967sub assemble {
968 my ($mnemonic,$mod,$args)=@_;
969 my $opcode = eval("\$$mnemonic");
970
971 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
972}
973
974foreach (split("\n",$code)) {
975 s/\`([^\`]*)\`/eval $1/ge;
976 # flip word order in 64-bit mode...
977 s/(xmpyu\s+)($fai|$fni)([LR])/$1.$2.($3 eq "L"?"R":"L")/e if ($BN_SZ==8);
978 # assemble 2.0 instructions in 32-bit mode...
979 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($BN_SZ==4);
980
981 s/\bbv\b/bve/gm if ($SIZE_T==8);
982
983 print $_,"\n";
984}
985close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/ppc-mont.pl b/src/lib/libcrypto/bn/asm/ppc-mont.pl
deleted file mode 100644
index 68320a87f7..0000000000
--- a/src/lib/libcrypto/bn/asm/ppc-mont.pl
+++ /dev/null
@@ -1,329 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# April 2006
11
12# "Teaser" Montgomery multiplication module for PowerPC. It's possible
13# to gain a bit more by modulo-scheduling outer loop, then dedicated
14# squaring procedure should give further 20% and code can be adapted
15# for 32-bit application running on 64-bit CPU. As for the latter.
16# It won't be able to achieve "native" 64-bit performance, because in
17# 32-bit application context every addc instruction will have to be
18# expanded as addc, twice right shift by 32 and finally adde, etc.
19# So far RSA *sign* performance improvement over pre-bn_mul_mont asm
20# for 64-bit application running on PPC970/G5 is:
21#
22# 512-bit +65%
23# 1024-bit +35%
24# 2048-bit +18%
25# 4096-bit +4%
26
27$flavour = shift;
28
29if ($flavour =~ /32/) {
30 $BITS= 32;
31 $BNSZ= $BITS/8;
32 $SIZE_T=4;
33 $RZONE= 224;
34
35 $LD= "lwz"; # load
36 $LDU= "lwzu"; # load and update
37 $LDX= "lwzx"; # load indexed
38 $ST= "stw"; # store
39 $STU= "stwu"; # store and update
40 $STX= "stwx"; # store indexed
41 $STUX= "stwux"; # store indexed and update
42 $UMULL= "mullw"; # unsigned multiply low
43 $UMULH= "mulhwu"; # unsigned multiply high
44 $UCMP= "cmplw"; # unsigned compare
45 $SHRI= "srwi"; # unsigned shift right by immediate
46 $PUSH= $ST;
47 $POP= $LD;
48} elsif ($flavour =~ /64/) {
49 $BITS= 64;
50 $BNSZ= $BITS/8;
51 $SIZE_T=8;
52 $RZONE= 288;
53
54 # same as above, but 64-bit mnemonics...
55 $LD= "ld"; # load
56 $LDU= "ldu"; # load and update
57 $LDX= "ldx"; # load indexed
58 $ST= "std"; # store
59 $STU= "stdu"; # store and update
60 $STX= "stdx"; # store indexed
61 $STUX= "stdux"; # store indexed and update
62 $UMULL= "mulld"; # unsigned multiply low
63 $UMULH= "mulhdu"; # unsigned multiply high
64 $UCMP= "cmpld"; # unsigned compare
65 $SHRI= "srdi"; # unsigned shift right by immediate
66 $PUSH= $ST;
67 $POP= $LD;
68} else { die "nonsense $flavour"; }
69
70$FRAME=8*$SIZE_T+$RZONE;
71$LOCALS=8*$SIZE_T;
72
73$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
74( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
75( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
76die "can't locate ppc-xlate.pl";
77
78open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
79
80$sp="r1";
81$toc="r2";
82$rp="r3"; $ovf="r3";
83$ap="r4";
84$bp="r5";
85$np="r6";
86$n0="r7";
87$num="r8";
88$rp="r9"; # $rp is reassigned
89$aj="r10";
90$nj="r11";
91$tj="r12";
92# non-volatile registers
93$i="r20";
94$j="r21";
95$tp="r22";
96$m0="r23";
97$m1="r24";
98$lo0="r25";
99$hi0="r26";
100$lo1="r27";
101$hi1="r28";
102$alo="r29";
103$ahi="r30";
104$nlo="r31";
105#
106$nhi="r0";
107
108$code=<<___;
109.machine "any"
110.text
111
112.globl .bn_mul_mont
113.align 4
114.bn_mul_mont:
115 cmpwi $num,4
116 mr $rp,r3 ; $rp is reassigned
117 li r3,0
118 bltlr
119___
120$code.=<<___ if ($BNSZ==4);
121 cmpwi $num,32 ; longer key performance is not better
122 bgelr
123___
124$code.=<<___;
125 slwi $num,$num,`log($BNSZ)/log(2)`
126 li $tj,-4096
127 addi $ovf,$num,$FRAME
128 subf $ovf,$ovf,$sp ; $sp-$ovf
129 and $ovf,$ovf,$tj ; minimize TLB usage
130 subf $ovf,$sp,$ovf ; $ovf-$sp
131 mr $tj,$sp
132 srwi $num,$num,`log($BNSZ)/log(2)`
133 $STUX $sp,$sp,$ovf
134
135 $PUSH r20,`-12*$SIZE_T`($tj)
136 $PUSH r21,`-11*$SIZE_T`($tj)
137 $PUSH r22,`-10*$SIZE_T`($tj)
138 $PUSH r23,`-9*$SIZE_T`($tj)
139 $PUSH r24,`-8*$SIZE_T`($tj)
140 $PUSH r25,`-7*$SIZE_T`($tj)
141 $PUSH r26,`-6*$SIZE_T`($tj)
142 $PUSH r27,`-5*$SIZE_T`($tj)
143 $PUSH r28,`-4*$SIZE_T`($tj)
144 $PUSH r29,`-3*$SIZE_T`($tj)
145 $PUSH r30,`-2*$SIZE_T`($tj)
146 $PUSH r31,`-1*$SIZE_T`($tj)
147
148 $LD $n0,0($n0) ; pull n0[0] value
149 addi $num,$num,-2 ; adjust $num for counter register
150
151 $LD $m0,0($bp) ; m0=bp[0]
152 $LD $aj,0($ap) ; ap[0]
153 addi $tp,$sp,$LOCALS
154 $UMULL $lo0,$aj,$m0 ; ap[0]*bp[0]
155 $UMULH $hi0,$aj,$m0
156
157 $LD $aj,$BNSZ($ap) ; ap[1]
158 $LD $nj,0($np) ; np[0]
159
160 $UMULL $m1,$lo0,$n0 ; "tp[0]"*n0
161
162 $UMULL $alo,$aj,$m0 ; ap[1]*bp[0]
163 $UMULH $ahi,$aj,$m0
164
165 $UMULL $lo1,$nj,$m1 ; np[0]*m1
166 $UMULH $hi1,$nj,$m1
167 $LD $nj,$BNSZ($np) ; np[1]
168 addc $lo1,$lo1,$lo0
169 addze $hi1,$hi1
170
171 $UMULL $nlo,$nj,$m1 ; np[1]*m1
172 $UMULH $nhi,$nj,$m1
173
174 mtctr $num
175 li $j,`2*$BNSZ`
176.align 4
177L1st:
178 $LDX $aj,$ap,$j ; ap[j]
179 addc $lo0,$alo,$hi0
180 $LDX $nj,$np,$j ; np[j]
181 addze $hi0,$ahi
182 $UMULL $alo,$aj,$m0 ; ap[j]*bp[0]
183 addc $lo1,$nlo,$hi1
184 $UMULH $ahi,$aj,$m0
185 addze $hi1,$nhi
186 $UMULL $nlo,$nj,$m1 ; np[j]*m1
187 addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0]
188 $UMULH $nhi,$nj,$m1
189 addze $hi1,$hi1
190 $ST $lo1,0($tp) ; tp[j-1]
191
192 addi $j,$j,$BNSZ ; j++
193 addi $tp,$tp,$BNSZ ; tp++
194 bdnz- L1st
195;L1st
196 addc $lo0,$alo,$hi0
197 addze $hi0,$ahi
198
199 addc $lo1,$nlo,$hi1
200 addze $hi1,$nhi
201 addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0]
202 addze $hi1,$hi1
203 $ST $lo1,0($tp) ; tp[j-1]
204
205 li $ovf,0
206 addc $hi1,$hi1,$hi0
207 addze $ovf,$ovf ; upmost overflow bit
208 $ST $hi1,$BNSZ($tp)
209
210 li $i,$BNSZ
211.align 4
212Louter:
213 $LDX $m0,$bp,$i ; m0=bp[i]
214 $LD $aj,0($ap) ; ap[0]
215 addi $tp,$sp,$LOCALS
216 $LD $tj,$LOCALS($sp); tp[0]
217 $UMULL $lo0,$aj,$m0 ; ap[0]*bp[i]
218 $UMULH $hi0,$aj,$m0
219 $LD $aj,$BNSZ($ap) ; ap[1]
220 $LD $nj,0($np) ; np[0]
221 addc $lo0,$lo0,$tj ; ap[0]*bp[i]+tp[0]
222 $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
223 addze $hi0,$hi0
224 $UMULL $m1,$lo0,$n0 ; tp[0]*n0
225 $UMULH $ahi,$aj,$m0
226 $UMULL $lo1,$nj,$m1 ; np[0]*m1
227 $UMULH $hi1,$nj,$m1
228 $LD $nj,$BNSZ($np) ; np[1]
229 addc $lo1,$lo1,$lo0
230 $UMULL $nlo,$nj,$m1 ; np[1]*m1
231 addze $hi1,$hi1
232 $UMULH $nhi,$nj,$m1
233
234 mtctr $num
235 li $j,`2*$BNSZ`
236.align 4
237Linner:
238 $LDX $aj,$ap,$j ; ap[j]
239 addc $lo0,$alo,$hi0
240 $LD $tj,$BNSZ($tp) ; tp[j]
241 addze $hi0,$ahi
242 $LDX $nj,$np,$j ; np[j]
243 addc $lo1,$nlo,$hi1
244 $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
245 addze $hi1,$nhi
246 $UMULH $ahi,$aj,$m0
247 addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j]
248 $UMULL $nlo,$nj,$m1 ; np[j]*m1
249 addze $hi0,$hi0
250 $UMULH $nhi,$nj,$m1
251 addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j]
252 addi $j,$j,$BNSZ ; j++
253 addze $hi1,$hi1
254 $ST $lo1,0($tp) ; tp[j-1]
255 addi $tp,$tp,$BNSZ ; tp++
256 bdnz- Linner
257;Linner
258 $LD $tj,$BNSZ($tp) ; tp[j]
259 addc $lo0,$alo,$hi0
260 addze $hi0,$ahi
261 addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j]
262 addze $hi0,$hi0
263
264 addc $lo1,$nlo,$hi1
265 addze $hi1,$nhi
266 addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j]
267 addze $hi1,$hi1
268 $ST $lo1,0($tp) ; tp[j-1]
269
270 addic $ovf,$ovf,-1 ; move upmost overflow to XER[CA]
271 li $ovf,0
272 adde $hi1,$hi1,$hi0
273 addze $ovf,$ovf
274 $ST $hi1,$BNSZ($tp)
275;
276 slwi $tj,$num,`log($BNSZ)/log(2)`
277 $UCMP $i,$tj
278 addi $i,$i,$BNSZ
279 ble- Louter
280
281 addi $num,$num,2 ; restore $num
282 subfc $j,$j,$j ; j=0 and "clear" XER[CA]
283 addi $tp,$sp,$LOCALS
284 mtctr $num
285
286.align 4
287Lsub: $LDX $tj,$tp,$j
288 $LDX $nj,$np,$j
289 subfe $aj,$nj,$tj ; tp[j]-np[j]
290 $STX $aj,$rp,$j
291 addi $j,$j,$BNSZ
292 bdnz- Lsub
293
294 li $j,0
295 mtctr $num
296 subfe $ovf,$j,$ovf ; handle upmost overflow bit
297 and $ap,$tp,$ovf
298 andc $np,$rp,$ovf
299 or $ap,$ap,$np ; ap=borrow?tp:rp
300
301.align 4
302Lcopy: ; copy or in-place refresh
303 $LDX $tj,$ap,$j
304 $STX $tj,$rp,$j
305 $STX $j,$tp,$j ; zap at once
306 addi $j,$j,$BNSZ
307 bdnz- Lcopy
308
309 $POP $tj,0($sp)
310 li r3,1
311 $POP r20,`-12*$SIZE_T`($tj)
312 $POP r21,`-11*$SIZE_T`($tj)
313 $POP r22,`-10*$SIZE_T`($tj)
314 $POP r23,`-9*$SIZE_T`($tj)
315 $POP r24,`-8*$SIZE_T`($tj)
316 $POP r25,`-7*$SIZE_T`($tj)
317 $POP r26,`-6*$SIZE_T`($tj)
318 $POP r27,`-5*$SIZE_T`($tj)
319 $POP r28,`-4*$SIZE_T`($tj)
320 $POP r29,`-3*$SIZE_T`($tj)
321 $POP r30,`-2*$SIZE_T`($tj)
322 $POP r31,`-1*$SIZE_T`($tj)
323 mr $sp,$tj
324 blr
325___
326
327$code =~ s/\`([^\`]*)\`/eval $1/gem;
328print $code;
329close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/ppc.pl b/src/lib/libcrypto/bn/asm/ppc.pl
deleted file mode 100644
index c9b7f9477d..0000000000
--- a/src/lib/libcrypto/bn/asm/ppc.pl
+++ /dev/null
@@ -1,1968 +0,0 @@
1#!/usr/bin/env perl
2#
3# Implemented as a Perl wrapper as we want to support several different
4# architectures with single file. We pick up the target based on the
5# file name we are asked to generate.
6#
7# It should be noted though that this perl code is nothing like
8# <openssl>/crypto/perlasm/x86*. In this case perl is used pretty much
9# as pre-processor to cover for platform differences in name decoration,
10# linker tables, 32-/64-bit instruction sets...
11#
12# As you might know there're several PowerPC ABI in use. Most notably
13# Linux and AIX use different 32-bit ABIs. Good news are that these ABIs
14# are similar enough to implement leaf(!) functions, which would be ABI
15# neutral. And that's what you find here: ABI neutral leaf functions.
16# In case you wonder what that is...
17#
18# AIX performance
19#
20# MEASUREMENTS WITH cc ON a 200 MhZ PowerPC 604e.
21#
22# The following is the performance of 32-bit compiler
23# generated code:
24#
25# OpenSSL 0.9.6c 21 dec 2001
26# built on: Tue Jun 11 11:06:51 EDT 2002
27# options:bn(64,32) ...
28#compiler: cc -DTHREADS -DAIX -DB_ENDIAN -DBN_LLONG -O3
29# sign verify sign/s verify/s
30#rsa 512 bits 0.0098s 0.0009s 102.0 1170.6
31#rsa 1024 bits 0.0507s 0.0026s 19.7 387.5
32#rsa 2048 bits 0.3036s 0.0085s 3.3 117.1
33#rsa 4096 bits 2.0040s 0.0299s 0.5 33.4
34#dsa 512 bits 0.0087s 0.0106s 114.3 94.5
35#dsa 1024 bits 0.0256s 0.0313s 39.0 32.0
36#
37# Same benchmark with this assembler code:
38#
39#rsa 512 bits 0.0056s 0.0005s 178.6 2049.2
40#rsa 1024 bits 0.0283s 0.0015s 35.3 674.1
41#rsa 2048 bits 0.1744s 0.0050s 5.7 201.2
42#rsa 4096 bits 1.1644s 0.0179s 0.9 55.7
43#dsa 512 bits 0.0052s 0.0062s 191.6 162.0
44#dsa 1024 bits 0.0149s 0.0180s 67.0 55.5
45#
46# Number of operations increases by at almost 75%
47#
48# Here are performance numbers for 64-bit compiler
49# generated code:
50#
51# OpenSSL 0.9.6g [engine] 9 Aug 2002
52# built on: Fri Apr 18 16:59:20 EDT 2003
53# options:bn(64,64) ...
54# compiler: cc -DTHREADS -D_REENTRANT -q64 -DB_ENDIAN -O3
55# sign verify sign/s verify/s
56#rsa 512 bits 0.0028s 0.0003s 357.1 3844.4
57#rsa 1024 bits 0.0148s 0.0008s 67.5 1239.7
58#rsa 2048 bits 0.0963s 0.0028s 10.4 353.0
59#rsa 4096 bits 0.6538s 0.0102s 1.5 98.1
60#dsa 512 bits 0.0026s 0.0032s 382.5 313.7
61#dsa 1024 bits 0.0081s 0.0099s 122.8 100.6
62#
63# Same benchmark with this assembler code:
64#
65#rsa 512 bits 0.0020s 0.0002s 510.4 6273.7
66#rsa 1024 bits 0.0088s 0.0005s 114.1 2128.3
67#rsa 2048 bits 0.0540s 0.0016s 18.5 622.5
68#rsa 4096 bits 0.3700s 0.0058s 2.7 171.0
69#dsa 512 bits 0.0016s 0.0020s 610.7 507.1
70#dsa 1024 bits 0.0047s 0.0058s 212.5 173.2
71#
72# Again, performance increases by at about 75%
73#
74# Mac OS X, Apple G5 1.8GHz (Note this is 32 bit code)
75# OpenSSL 0.9.7c 30 Sep 2003
76#
77# Original code.
78#
79#rsa 512 bits 0.0011s 0.0001s 906.1 11012.5
80#rsa 1024 bits 0.0060s 0.0003s 166.6 3363.1
81#rsa 2048 bits 0.0370s 0.0010s 27.1 982.4
82#rsa 4096 bits 0.2426s 0.0036s 4.1 280.4
83#dsa 512 bits 0.0010s 0.0012s 1038.1 841.5
84#dsa 1024 bits 0.0030s 0.0037s 329.6 269.7
85#dsa 2048 bits 0.0101s 0.0127s 98.9 78.6
86#
87# Same benchmark with this assembler code:
88#
89#rsa 512 bits 0.0007s 0.0001s 1416.2 16645.9
90#rsa 1024 bits 0.0036s 0.0002s 274.4 5380.6
91#rsa 2048 bits 0.0222s 0.0006s 45.1 1589.5
92#rsa 4096 bits 0.1469s 0.0022s 6.8 449.6
93#dsa 512 bits 0.0006s 0.0007s 1664.2 1376.2
94#dsa 1024 bits 0.0018s 0.0023s 545.0 442.2
95#dsa 2048 bits 0.0061s 0.0075s 163.5 132.8
96#
97# Performance increase of ~60%
98#
99# If you have comments or suggestions to improve code send
100# me a note at schari@us.ibm.com
101#
102
103$flavour = shift;
104
105if ($flavour =~ /32/) {
106 $BITS= 32;
107 $BNSZ= $BITS/8;
108 $ISA= "\"ppc\"";
109
110 $LD= "lwz"; # load
111 $LDU= "lwzu"; # load and update
112 $ST= "stw"; # store
113 $STU= "stwu"; # store and update
114 $UMULL= "mullw"; # unsigned multiply low
115 $UMULH= "mulhwu"; # unsigned multiply high
116 $UDIV= "divwu"; # unsigned divide
117 $UCMPI= "cmplwi"; # unsigned compare with immediate
118 $UCMP= "cmplw"; # unsigned compare
119 $CNTLZ= "cntlzw"; # count leading zeros
120 $SHL= "slw"; # shift left
121 $SHR= "srw"; # unsigned shift right
122 $SHRI= "srwi"; # unsigned shift right by immediate
123 $SHLI= "slwi"; # shift left by immediate
124 $CLRU= "clrlwi"; # clear upper bits
125 $INSR= "insrwi"; # insert right
126 $ROTL= "rotlwi"; # rotate left by immediate
127 $TR= "tw"; # conditional trap
128} elsif ($flavour =~ /64/) {
129 $BITS= 64;
130 $BNSZ= $BITS/8;
131 $ISA= "\"ppc64\"";
132
133 # same as above, but 64-bit mnemonics...
134 $LD= "ld"; # load
135 $LDU= "ldu"; # load and update
136 $ST= "std"; # store
137 $STU= "stdu"; # store and update
138 $UMULL= "mulld"; # unsigned multiply low
139 $UMULH= "mulhdu"; # unsigned multiply high
140 $UDIV= "divdu"; # unsigned divide
141 $UCMPI= "cmpldi"; # unsigned compare with immediate
142 $UCMP= "cmpld"; # unsigned compare
143 $CNTLZ= "cntlzd"; # count leading zeros
144 $SHL= "sld"; # shift left
145 $SHR= "srd"; # unsigned shift right
146 $SHRI= "srdi"; # unsigned shift right by immediate
147 $SHLI= "sldi"; # shift left by immediate
148 $CLRU= "clrldi"; # clear upper bits
149 $INSR= "insrdi"; # insert right
150 $ROTL= "rotldi"; # rotate left by immediate
151 $TR= "td"; # conditional trap
152} else { die "nonsense $flavour"; }
153
154$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
155( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
156( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
157die "can't locate ppc-xlate.pl";
158
159open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
160
161$data=<<EOF;
162#--------------------------------------------------------------------
163#
164#
165#
166#
167# File: ppc32.s
168#
169# Created by: Suresh Chari
170# IBM Thomas J. Watson Research Library
171# Hawthorne, NY
172#
173#
174# Description: Optimized assembly routines for OpenSSL crypto
175# on the 32 bitPowerPC platform.
176#
177#
178# Version History
179#
180# 2. Fixed bn_add,bn_sub and bn_div_words, added comments,
181# cleaned up code. Also made a single version which can
182# be used for both the AIX and Linux compilers. See NOTE
183# below.
184# 12/05/03 Suresh Chari
185# (with lots of help from) Andy Polyakov
186##
187# 1. Initial version 10/20/02 Suresh Chari
188#
189#
190# The following file works for the xlc,cc
191# and gcc compilers.
192#
193# NOTE: To get the file to link correctly with the gcc compiler
194# you have to change the names of the routines and remove
195# the first .(dot) character. This should automatically
196# be done in the build process.
197#
198# Hand optimized assembly code for the following routines
199#
200# bn_sqr_comba4
201# bn_sqr_comba8
202# bn_mul_comba4
203# bn_mul_comba8
204# bn_sub_words
205# bn_add_words
206# bn_div_words
207# bn_sqr_words
208# bn_mul_words
209# bn_mul_add_words
210#
211# NOTE: It is possible to optimize this code more for
212# specific PowerPC or Power architectures. On the Northstar
213# architecture the optimizations in this file do
214# NOT provide much improvement.
215#
216# If you have comments or suggestions to improve code send
217# me a note at schari\@us.ibm.com
218#
219#--------------------------------------------------------------------------
220#
221# Defines to be used in the assembly code.
222#
223#.set r0,0 # we use it as storage for value of 0
224#.set SP,1 # preserved
225#.set RTOC,2 # preserved
226#.set r3,3 # 1st argument/return value
227#.set r4,4 # 2nd argument/volatile register
228#.set r5,5 # 3rd argument/volatile register
229#.set r6,6 # ...
230#.set r7,7
231#.set r8,8
232#.set r9,9
233#.set r10,10
234#.set r11,11
235#.set r12,12
236#.set r13,13 # not used, nor any other "below" it...
237
238# Declare function names to be global
239# NOTE: For gcc these names MUST be changed to remove
240# the first . i.e. for example change ".bn_sqr_comba4"
241# to "bn_sqr_comba4". This should be automatically done
242# in the build.
243
244 .globl .bn_sqr_comba4
245 .globl .bn_sqr_comba8
246 .globl .bn_mul_comba4
247 .globl .bn_mul_comba8
248 .globl .bn_sub_words
249 .globl .bn_add_words
250 .globl .bn_div_words
251 .globl .bn_sqr_words
252 .globl .bn_mul_words
253 .globl .bn_mul_add_words
254
255# .text section
256
257 .machine "any"
258
259#
260# NOTE: The following label name should be changed to
261# "bn_sqr_comba4" i.e. remove the first dot
262# for the gcc compiler. This should be automatically
263# done in the build
264#
265
266.align 4
267.bn_sqr_comba4:
268#
269# Optimized version of bn_sqr_comba4.
270#
271# void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
272# r3 contains r
273# r4 contains a
274#
275# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows:
276#
277# r5,r6 are the two BN_ULONGs being multiplied.
278# r7,r8 are the results of the 32x32 giving 64 bit multiply.
279# r9,r10, r11 are the equivalents of c1,c2, c3.
280# Here's the assembly
281#
282#
283 xor r0,r0,r0 # set r0 = 0. Used in the addze
284 # instructions below
285
286 #sqr_add_c(a,0,c1,c2,c3)
287 $LD r5,`0*$BNSZ`(r4)
288 $UMULL r9,r5,r5
289 $UMULH r10,r5,r5 #in first iteration. No need
290 #to add since c1=c2=c3=0.
291 # Note c3(r11) is NOT set to 0
292 # but will be.
293
294 $ST r9,`0*$BNSZ`(r3) # r[0]=c1;
295 # sqr_add_c2(a,1,0,c2,c3,c1);
296 $LD r6,`1*$BNSZ`(r4)
297 $UMULL r7,r5,r6
298 $UMULH r8,r5,r6
299
300 addc r7,r7,r7 # compute (r7,r8)=2*(r7,r8)
301 adde r8,r8,r8
302 addze r9,r0 # catch carry if any.
303 # r9= r0(=0) and carry
304
305 addc r10,r7,r10 # now add to temp result.
306 addze r11,r8 # r8 added to r11 which is 0
307 addze r9,r9
308
309 $ST r10,`1*$BNSZ`(r3) #r[1]=c2;
310 #sqr_add_c(a,1,c3,c1,c2)
311 $UMULL r7,r6,r6
312 $UMULH r8,r6,r6
313 addc r11,r7,r11
314 adde r9,r8,r9
315 addze r10,r0
316 #sqr_add_c2(a,2,0,c3,c1,c2)
317 $LD r6,`2*$BNSZ`(r4)
318 $UMULL r7,r5,r6
319 $UMULH r8,r5,r6
320
321 addc r7,r7,r7
322 adde r8,r8,r8
323 addze r10,r10
324
325 addc r11,r7,r11
326 adde r9,r8,r9
327 addze r10,r10
328 $ST r11,`2*$BNSZ`(r3) #r[2]=c3
329 #sqr_add_c2(a,3,0,c1,c2,c3);
330 $LD r6,`3*$BNSZ`(r4)
331 $UMULL r7,r5,r6
332 $UMULH r8,r5,r6
333 addc r7,r7,r7
334 adde r8,r8,r8
335 addze r11,r0
336
337 addc r9,r7,r9
338 adde r10,r8,r10
339 addze r11,r11
340 #sqr_add_c2(a,2,1,c1,c2,c3);
341 $LD r5,`1*$BNSZ`(r4)
342 $LD r6,`2*$BNSZ`(r4)
343 $UMULL r7,r5,r6
344 $UMULH r8,r5,r6
345
346 addc r7,r7,r7
347 adde r8,r8,r8
348 addze r11,r11
349 addc r9,r7,r9
350 adde r10,r8,r10
351 addze r11,r11
352 $ST r9,`3*$BNSZ`(r3) #r[3]=c1
353 #sqr_add_c(a,2,c2,c3,c1);
354 $UMULL r7,r6,r6
355 $UMULH r8,r6,r6
356 addc r10,r7,r10
357 adde r11,r8,r11
358 addze r9,r0
359 #sqr_add_c2(a,3,1,c2,c3,c1);
360 $LD r6,`3*$BNSZ`(r4)
361 $UMULL r7,r5,r6
362 $UMULH r8,r5,r6
363 addc r7,r7,r7
364 adde r8,r8,r8
365 addze r9,r9
366
367 addc r10,r7,r10
368 adde r11,r8,r11
369 addze r9,r9
370 $ST r10,`4*$BNSZ`(r3) #r[4]=c2
371 #sqr_add_c2(a,3,2,c3,c1,c2);
372 $LD r5,`2*$BNSZ`(r4)
373 $UMULL r7,r5,r6
374 $UMULH r8,r5,r6
375 addc r7,r7,r7
376 adde r8,r8,r8
377 addze r10,r0
378
379 addc r11,r7,r11
380 adde r9,r8,r9
381 addze r10,r10
382 $ST r11,`5*$BNSZ`(r3) #r[5] = c3
383 #sqr_add_c(a,3,c1,c2,c3);
384 $UMULL r7,r6,r6
385 $UMULH r8,r6,r6
386 addc r9,r7,r9
387 adde r10,r8,r10
388
389 $ST r9,`6*$BNSZ`(r3) #r[6]=c1
390 $ST r10,`7*$BNSZ`(r3) #r[7]=c2
391 blr
392
393#
394# NOTE: The following label name should be changed to
395# "bn_sqr_comba8" i.e. remove the first dot
396# for the gcc compiler. This should be automatically
397# done in the build
398#
399
400.align 4
401.bn_sqr_comba8:
402#
403# This is an optimized version of the bn_sqr_comba8 routine.
404# Tightly uses the adde instruction
405#
406#
407# void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
408# r3 contains r
409# r4 contains a
410#
411# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows:
412#
413# r5,r6 are the two BN_ULONGs being multiplied.
414# r7,r8 are the results of the 32x32 giving 64 bit multiply.
415# r9,r10, r11 are the equivalents of c1,c2, c3.
416#
417# Possible optimization of loading all 8 longs of a into registers
418# doesnt provide any speedup
419#
420
421 xor r0,r0,r0 #set r0 = 0.Used in addze
422 #instructions below.
423
424 #sqr_add_c(a,0,c1,c2,c3);
425 $LD r5,`0*$BNSZ`(r4)
426 $UMULL r9,r5,r5 #1st iteration: no carries.
427 $UMULH r10,r5,r5
428 $ST r9,`0*$BNSZ`(r3) # r[0]=c1;
429 #sqr_add_c2(a,1,0,c2,c3,c1);
430 $LD r6,`1*$BNSZ`(r4)
431 $UMULL r7,r5,r6
432 $UMULH r8,r5,r6
433
434 addc r10,r7,r10 #add the two register number
435 adde r11,r8,r0 # (r8,r7) to the three register
436 addze r9,r0 # number (r9,r11,r10).NOTE:r0=0
437
438 addc r10,r7,r10 #add the two register number
439 adde r11,r8,r11 # (r8,r7) to the three register
440 addze r9,r9 # number (r9,r11,r10).
441
442 $ST r10,`1*$BNSZ`(r3) # r[1]=c2
443
444 #sqr_add_c(a,1,c3,c1,c2);
445 $UMULL r7,r6,r6
446 $UMULH r8,r6,r6
447 addc r11,r7,r11
448 adde r9,r8,r9
449 addze r10,r0
450 #sqr_add_c2(a,2,0,c3,c1,c2);
451 $LD r6,`2*$BNSZ`(r4)
452 $UMULL r7,r5,r6
453 $UMULH r8,r5,r6
454
455 addc r11,r7,r11
456 adde r9,r8,r9
457 addze r10,r10
458
459 addc r11,r7,r11
460 adde r9,r8,r9
461 addze r10,r10
462
463 $ST r11,`2*$BNSZ`(r3) #r[2]=c3
464 #sqr_add_c2(a,3,0,c1,c2,c3);
465 $LD r6,`3*$BNSZ`(r4) #r6 = a[3]. r5 is already a[0].
466 $UMULL r7,r5,r6
467 $UMULH r8,r5,r6
468
469 addc r9,r7,r9
470 adde r10,r8,r10
471 addze r11,r0
472
473 addc r9,r7,r9
474 adde r10,r8,r10
475 addze r11,r11
476 #sqr_add_c2(a,2,1,c1,c2,c3);
477 $LD r5,`1*$BNSZ`(r4)
478 $LD r6,`2*$BNSZ`(r4)
479 $UMULL r7,r5,r6
480 $UMULH r8,r5,r6
481
482 addc r9,r7,r9
483 adde r10,r8,r10
484 addze r11,r11
485
486 addc r9,r7,r9
487 adde r10,r8,r10
488 addze r11,r11
489
490 $ST r9,`3*$BNSZ`(r3) #r[3]=c1;
491 #sqr_add_c(a,2,c2,c3,c1);
492 $UMULL r7,r6,r6
493 $UMULH r8,r6,r6
494
495 addc r10,r7,r10
496 adde r11,r8,r11
497 addze r9,r0
498 #sqr_add_c2(a,3,1,c2,c3,c1);
499 $LD r6,`3*$BNSZ`(r4)
500 $UMULL r7,r5,r6
501 $UMULH r8,r5,r6
502
503 addc r10,r7,r10
504 adde r11,r8,r11
505 addze r9,r9
506
507 addc r10,r7,r10
508 adde r11,r8,r11
509 addze r9,r9
510 #sqr_add_c2(a,4,0,c2,c3,c1);
511 $LD r5,`0*$BNSZ`(r4)
512 $LD r6,`4*$BNSZ`(r4)
513 $UMULL r7,r5,r6
514 $UMULH r8,r5,r6
515
516 addc r10,r7,r10
517 adde r11,r8,r11
518 addze r9,r9
519
520 addc r10,r7,r10
521 adde r11,r8,r11
522 addze r9,r9
523 $ST r10,`4*$BNSZ`(r3) #r[4]=c2;
524 #sqr_add_c2(a,5,0,c3,c1,c2);
525 $LD r6,`5*$BNSZ`(r4)
526 $UMULL r7,r5,r6
527 $UMULH r8,r5,r6
528
529 addc r11,r7,r11
530 adde r9,r8,r9
531 addze r10,r0
532
533 addc r11,r7,r11
534 adde r9,r8,r9
535 addze r10,r10
536 #sqr_add_c2(a,4,1,c3,c1,c2);
537 $LD r5,`1*$BNSZ`(r4)
538 $LD r6,`4*$BNSZ`(r4)
539 $UMULL r7,r5,r6
540 $UMULH r8,r5,r6
541
542 addc r11,r7,r11
543 adde r9,r8,r9
544 addze r10,r10
545
546 addc r11,r7,r11
547 adde r9,r8,r9
548 addze r10,r10
549 #sqr_add_c2(a,3,2,c3,c1,c2);
550 $LD r5,`2*$BNSZ`(r4)
551 $LD r6,`3*$BNSZ`(r4)
552 $UMULL r7,r5,r6
553 $UMULH r8,r5,r6
554
555 addc r11,r7,r11
556 adde r9,r8,r9
557 addze r10,r10
558
559 addc r11,r7,r11
560 adde r9,r8,r9
561 addze r10,r10
562 $ST r11,`5*$BNSZ`(r3) #r[5]=c3;
563 #sqr_add_c(a,3,c1,c2,c3);
564 $UMULL r7,r6,r6
565 $UMULH r8,r6,r6
566 addc r9,r7,r9
567 adde r10,r8,r10
568 addze r11,r0
569 #sqr_add_c2(a,4,2,c1,c2,c3);
570 $LD r6,`4*$BNSZ`(r4)
571 $UMULL r7,r5,r6
572 $UMULH r8,r5,r6
573
574 addc r9,r7,r9
575 adde r10,r8,r10
576 addze r11,r11
577
578 addc r9,r7,r9
579 adde r10,r8,r10
580 addze r11,r11
581 #sqr_add_c2(a,5,1,c1,c2,c3);
582 $LD r5,`1*$BNSZ`(r4)
583 $LD r6,`5*$BNSZ`(r4)
584 $UMULL r7,r5,r6
585 $UMULH r8,r5,r6
586
587 addc r9,r7,r9
588 adde r10,r8,r10
589 addze r11,r11
590
591 addc r9,r7,r9
592 adde r10,r8,r10
593 addze r11,r11
594 #sqr_add_c2(a,6,0,c1,c2,c3);
595 $LD r5,`0*$BNSZ`(r4)
596 $LD r6,`6*$BNSZ`(r4)
597 $UMULL r7,r5,r6
598 $UMULH r8,r5,r6
599 addc r9,r7,r9
600 adde r10,r8,r10
601 addze r11,r11
602 addc r9,r7,r9
603 adde r10,r8,r10
604 addze r11,r11
605 $ST r9,`6*$BNSZ`(r3) #r[6]=c1;
606 #sqr_add_c2(a,7,0,c2,c3,c1);
607 $LD r6,`7*$BNSZ`(r4)
608 $UMULL r7,r5,r6
609 $UMULH r8,r5,r6
610
611 addc r10,r7,r10
612 adde r11,r8,r11
613 addze r9,r0
614 addc r10,r7,r10
615 adde r11,r8,r11
616 addze r9,r9
617 #sqr_add_c2(a,6,1,c2,c3,c1);
618 $LD r5,`1*$BNSZ`(r4)
619 $LD r6,`6*$BNSZ`(r4)
620 $UMULL r7,r5,r6
621 $UMULH r8,r5,r6
622
623 addc r10,r7,r10
624 adde r11,r8,r11
625 addze r9,r9
626 addc r10,r7,r10
627 adde r11,r8,r11
628 addze r9,r9
629 #sqr_add_c2(a,5,2,c2,c3,c1);
630 $LD r5,`2*$BNSZ`(r4)
631 $LD r6,`5*$BNSZ`(r4)
632 $UMULL r7,r5,r6
633 $UMULH r8,r5,r6
634 addc r10,r7,r10
635 adde r11,r8,r11
636 addze r9,r9
637 addc r10,r7,r10
638 adde r11,r8,r11
639 addze r9,r9
640 #sqr_add_c2(a,4,3,c2,c3,c1);
641 $LD r5,`3*$BNSZ`(r4)
642 $LD r6,`4*$BNSZ`(r4)
643 $UMULL r7,r5,r6
644 $UMULH r8,r5,r6
645
646 addc r10,r7,r10
647 adde r11,r8,r11
648 addze r9,r9
649 addc r10,r7,r10
650 adde r11,r8,r11
651 addze r9,r9
652 $ST r10,`7*$BNSZ`(r3) #r[7]=c2;
653 #sqr_add_c(a,4,c3,c1,c2);
654 $UMULL r7,r6,r6
655 $UMULH r8,r6,r6
656 addc r11,r7,r11
657 adde r9,r8,r9
658 addze r10,r0
659 #sqr_add_c2(a,5,3,c3,c1,c2);
660 $LD r6,`5*$BNSZ`(r4)
661 $UMULL r7,r5,r6
662 $UMULH r8,r5,r6
663 addc r11,r7,r11
664 adde r9,r8,r9
665 addze r10,r10
666 addc r11,r7,r11
667 adde r9,r8,r9
668 addze r10,r10
669 #sqr_add_c2(a,6,2,c3,c1,c2);
670 $LD r5,`2*$BNSZ`(r4)
671 $LD r6,`6*$BNSZ`(r4)
672 $UMULL r7,r5,r6
673 $UMULH r8,r5,r6
674 addc r11,r7,r11
675 adde r9,r8,r9
676 addze r10,r10
677
678 addc r11,r7,r11
679 adde r9,r8,r9
680 addze r10,r10
681 #sqr_add_c2(a,7,1,c3,c1,c2);
682 $LD r5,`1*$BNSZ`(r4)
683 $LD r6,`7*$BNSZ`(r4)
684 $UMULL r7,r5,r6
685 $UMULH r8,r5,r6
686 addc r11,r7,r11
687 adde r9,r8,r9
688 addze r10,r10
689 addc r11,r7,r11
690 adde r9,r8,r9
691 addze r10,r10
692 $ST r11,`8*$BNSZ`(r3) #r[8]=c3;
693 #sqr_add_c2(a,7,2,c1,c2,c3);
694 $LD r5,`2*$BNSZ`(r4)
695 $UMULL r7,r5,r6
696 $UMULH r8,r5,r6
697
698 addc r9,r7,r9
699 adde r10,r8,r10
700 addze r11,r0
701 addc r9,r7,r9
702 adde r10,r8,r10
703 addze r11,r11
704 #sqr_add_c2(a,6,3,c1,c2,c3);
705 $LD r5,`3*$BNSZ`(r4)
706 $LD r6,`6*$BNSZ`(r4)
707 $UMULL r7,r5,r6
708 $UMULH r8,r5,r6
709 addc r9,r7,r9
710 adde r10,r8,r10
711 addze r11,r11
712 addc r9,r7,r9
713 adde r10,r8,r10
714 addze r11,r11
715 #sqr_add_c2(a,5,4,c1,c2,c3);
716 $LD r5,`4*$BNSZ`(r4)
717 $LD r6,`5*$BNSZ`(r4)
718 $UMULL r7,r5,r6
719 $UMULH r8,r5,r6
720 addc r9,r7,r9
721 adde r10,r8,r10
722 addze r11,r11
723 addc r9,r7,r9
724 adde r10,r8,r10
725 addze r11,r11
726 $ST r9,`9*$BNSZ`(r3) #r[9]=c1;
727 #sqr_add_c(a,5,c2,c3,c1);
728 $UMULL r7,r6,r6
729 $UMULH r8,r6,r6
730 addc r10,r7,r10
731 adde r11,r8,r11
732 addze r9,r0
733 #sqr_add_c2(a,6,4,c2,c3,c1);
734 $LD r6,`6*$BNSZ`(r4)
735 $UMULL r7,r5,r6
736 $UMULH r8,r5,r6
737 addc r10,r7,r10
738 adde r11,r8,r11
739 addze r9,r9
740 addc r10,r7,r10
741 adde r11,r8,r11
742 addze r9,r9
743 #sqr_add_c2(a,7,3,c2,c3,c1);
744 $LD r5,`3*$BNSZ`(r4)
745 $LD r6,`7*$BNSZ`(r4)
746 $UMULL r7,r5,r6
747 $UMULH r8,r5,r6
748 addc r10,r7,r10
749 adde r11,r8,r11
750 addze r9,r9
751 addc r10,r7,r10
752 adde r11,r8,r11
753 addze r9,r9
754 $ST r10,`10*$BNSZ`(r3) #r[10]=c2;
755 #sqr_add_c2(a,7,4,c3,c1,c2);
756 $LD r5,`4*$BNSZ`(r4)
757 $UMULL r7,r5,r6
758 $UMULH r8,r5,r6
759 addc r11,r7,r11
760 adde r9,r8,r9
761 addze r10,r0
762 addc r11,r7,r11
763 adde r9,r8,r9
764 addze r10,r10
765 #sqr_add_c2(a,6,5,c3,c1,c2);
766 $LD r5,`5*$BNSZ`(r4)
767 $LD r6,`6*$BNSZ`(r4)
768 $UMULL r7,r5,r6
769 $UMULH r8,r5,r6
770 addc r11,r7,r11
771 adde r9,r8,r9
772 addze r10,r10
773 addc r11,r7,r11
774 adde r9,r8,r9
775 addze r10,r10
776 $ST r11,`11*$BNSZ`(r3) #r[11]=c3;
777 #sqr_add_c(a,6,c1,c2,c3);
778 $UMULL r7,r6,r6
779 $UMULH r8,r6,r6
780 addc r9,r7,r9
781 adde r10,r8,r10
782 addze r11,r0
783 #sqr_add_c2(a,7,5,c1,c2,c3)
784 $LD r6,`7*$BNSZ`(r4)
785 $UMULL r7,r5,r6
786 $UMULH r8,r5,r6
787 addc r9,r7,r9
788 adde r10,r8,r10
789 addze r11,r11
790 addc r9,r7,r9
791 adde r10,r8,r10
792 addze r11,r11
793 $ST r9,`12*$BNSZ`(r3) #r[12]=c1;
794
795 #sqr_add_c2(a,7,6,c2,c3,c1)
796 $LD r5,`6*$BNSZ`(r4)
797 $UMULL r7,r5,r6
798 $UMULH r8,r5,r6
799 addc r10,r7,r10
800 adde r11,r8,r11
801 addze r9,r0
802 addc r10,r7,r10
803 adde r11,r8,r11
804 addze r9,r9
805 $ST r10,`13*$BNSZ`(r3) #r[13]=c2;
806 #sqr_add_c(a,7,c3,c1,c2);
807 $UMULL r7,r6,r6
808 $UMULH r8,r6,r6
809 addc r11,r7,r11
810 adde r9,r8,r9
811 $ST r11,`14*$BNSZ`(r3) #r[14]=c3;
812 $ST r9, `15*$BNSZ`(r3) #r[15]=c1;
813
814
815 blr
816
817#
818# NOTE: The following label name should be changed to
819# "bn_mul_comba4" i.e. remove the first dot
820# for the gcc compiler. This should be automatically
821# done in the build
822#
823
824.align 4
825.bn_mul_comba4:
826#
827# This is an optimized version of the bn_mul_comba4 routine.
828#
829# void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
830# r3 contains r
831# r4 contains a
832# r5 contains b
833# r6, r7 are the 2 BN_ULONGs being multiplied.
834# r8, r9 are the results of the 32x32 giving 64 multiply.
835# r10, r11, r12 are the equivalents of c1, c2, and c3.
836#
837 xor r0,r0,r0 #r0=0. Used in addze below.
838 #mul_add_c(a[0],b[0],c1,c2,c3);
839 $LD r6,`0*$BNSZ`(r4)
840 $LD r7,`0*$BNSZ`(r5)
841 $UMULL r10,r6,r7
842 $UMULH r11,r6,r7
843 $ST r10,`0*$BNSZ`(r3) #r[0]=c1
844 #mul_add_c(a[0],b[1],c2,c3,c1);
845 $LD r7,`1*$BNSZ`(r5)
846 $UMULL r8,r6,r7
847 $UMULH r9,r6,r7
848 addc r11,r8,r11
849 adde r12,r9,r0
850 addze r10,r0
851 #mul_add_c(a[1],b[0],c2,c3,c1);
852 $LD r6, `1*$BNSZ`(r4)
853 $LD r7, `0*$BNSZ`(r5)
854 $UMULL r8,r6,r7
855 $UMULH r9,r6,r7
856 addc r11,r8,r11
857 adde r12,r9,r12
858 addze r10,r10
859 $ST r11,`1*$BNSZ`(r3) #r[1]=c2
860 #mul_add_c(a[2],b[0],c3,c1,c2);
861 $LD r6,`2*$BNSZ`(r4)
862 $UMULL r8,r6,r7
863 $UMULH r9,r6,r7
864 addc r12,r8,r12
865 adde r10,r9,r10
866 addze r11,r0
867 #mul_add_c(a[1],b[1],c3,c1,c2);
868 $LD r6,`1*$BNSZ`(r4)
869 $LD r7,`1*$BNSZ`(r5)
870 $UMULL r8,r6,r7
871 $UMULH r9,r6,r7
872 addc r12,r8,r12
873 adde r10,r9,r10
874 addze r11,r11
875 #mul_add_c(a[0],b[2],c3,c1,c2);
876 $LD r6,`0*$BNSZ`(r4)
877 $LD r7,`2*$BNSZ`(r5)
878 $UMULL r8,r6,r7
879 $UMULH r9,r6,r7
880 addc r12,r8,r12
881 adde r10,r9,r10
882 addze r11,r11
883 $ST r12,`2*$BNSZ`(r3) #r[2]=c3
884 #mul_add_c(a[0],b[3],c1,c2,c3);
885 $LD r7,`3*$BNSZ`(r5)
886 $UMULL r8,r6,r7
887 $UMULH r9,r6,r7
888 addc r10,r8,r10
889 adde r11,r9,r11
890 addze r12,r0
891 #mul_add_c(a[1],b[2],c1,c2,c3);
892 $LD r6,`1*$BNSZ`(r4)
893 $LD r7,`2*$BNSZ`(r5)
894 $UMULL r8,r6,r7
895 $UMULH r9,r6,r7
896 addc r10,r8,r10
897 adde r11,r9,r11
898 addze r12,r12
899 #mul_add_c(a[2],b[1],c1,c2,c3);
900 $LD r6,`2*$BNSZ`(r4)
901 $LD r7,`1*$BNSZ`(r5)
902 $UMULL r8,r6,r7
903 $UMULH r9,r6,r7
904 addc r10,r8,r10
905 adde r11,r9,r11
906 addze r12,r12
907 #mul_add_c(a[3],b[0],c1,c2,c3);
908 $LD r6,`3*$BNSZ`(r4)
909 $LD r7,`0*$BNSZ`(r5)
910 $UMULL r8,r6,r7
911 $UMULH r9,r6,r7
912 addc r10,r8,r10
913 adde r11,r9,r11
914 addze r12,r12
915 $ST r10,`3*$BNSZ`(r3) #r[3]=c1
916 #mul_add_c(a[3],b[1],c2,c3,c1);
917 $LD r7,`1*$BNSZ`(r5)
918 $UMULL r8,r6,r7
919 $UMULH r9,r6,r7
920 addc r11,r8,r11
921 adde r12,r9,r12
922 addze r10,r0
923 #mul_add_c(a[2],b[2],c2,c3,c1);
924 $LD r6,`2*$BNSZ`(r4)
925 $LD r7,`2*$BNSZ`(r5)
926 $UMULL r8,r6,r7
927 $UMULH r9,r6,r7
928 addc r11,r8,r11
929 adde r12,r9,r12
930 addze r10,r10
931 #mul_add_c(a[1],b[3],c2,c3,c1);
932 $LD r6,`1*$BNSZ`(r4)
933 $LD r7,`3*$BNSZ`(r5)
934 $UMULL r8,r6,r7
935 $UMULH r9,r6,r7
936 addc r11,r8,r11
937 adde r12,r9,r12
938 addze r10,r10
939 $ST r11,`4*$BNSZ`(r3) #r[4]=c2
940 #mul_add_c(a[2],b[3],c3,c1,c2);
941 $LD r6,`2*$BNSZ`(r4)
942 $UMULL r8,r6,r7
943 $UMULH r9,r6,r7
944 addc r12,r8,r12
945 adde r10,r9,r10
946 addze r11,r0
947 #mul_add_c(a[3],b[2],c3,c1,c2);
948 $LD r6,`3*$BNSZ`(r4)
949 $LD r7,`2*$BNSZ`(r5)
950 $UMULL r8,r6,r7
951 $UMULH r9,r6,r7
952 addc r12,r8,r12
953 adde r10,r9,r10
954 addze r11,r11
955 $ST r12,`5*$BNSZ`(r3) #r[5]=c3
956 #mul_add_c(a[3],b[3],c1,c2,c3);
957 $LD r7,`3*$BNSZ`(r5)
958 $UMULL r8,r6,r7
959 $UMULH r9,r6,r7
960 addc r10,r8,r10
961 adde r11,r9,r11
962
963 $ST r10,`6*$BNSZ`(r3) #r[6]=c1
964 $ST r11,`7*$BNSZ`(r3) #r[7]=c2
965 blr
966
967#
968# NOTE: The following label name should be changed to
969# "bn_mul_comba8" i.e. remove the first dot
970# for the gcc compiler. This should be automatically
971# done in the build
972#
973
974.align 4
975.bn_mul_comba8:
976#
977# Optimized version of the bn_mul_comba8 routine.
978#
979# void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
980# r3 contains r
981# r4 contains a
982# r5 contains b
983# r6, r7 are the 2 BN_ULONGs being multiplied.
984# r8, r9 are the results of the 32x32 giving 64 multiply.
985# r10, r11, r12 are the equivalents of c1, c2, and c3.
986#
987 xor r0,r0,r0 #r0=0. Used in addze below.
988
989 #mul_add_c(a[0],b[0],c1,c2,c3);
990 $LD r6,`0*$BNSZ`(r4) #a[0]
991 $LD r7,`0*$BNSZ`(r5) #b[0]
992 $UMULL r10,r6,r7
993 $UMULH r11,r6,r7
994 $ST r10,`0*$BNSZ`(r3) #r[0]=c1;
995 #mul_add_c(a[0],b[1],c2,c3,c1);
996 $LD r7,`1*$BNSZ`(r5)
997 $UMULL r8,r6,r7
998 $UMULH r9,r6,r7
999 addc r11,r11,r8
1000 addze r12,r9 # since we didnt set r12 to zero before.
1001 addze r10,r0
1002 #mul_add_c(a[1],b[0],c2,c3,c1);
1003 $LD r6,`1*$BNSZ`(r4)
1004 $LD r7,`0*$BNSZ`(r5)
1005 $UMULL r8,r6,r7
1006 $UMULH r9,r6,r7
1007 addc r11,r11,r8
1008 adde r12,r12,r9
1009 addze r10,r10
1010 $ST r11,`1*$BNSZ`(r3) #r[1]=c2;
1011 #mul_add_c(a[2],b[0],c3,c1,c2);
1012 $LD r6,`2*$BNSZ`(r4)
1013 $UMULL r8,r6,r7
1014 $UMULH r9,r6,r7
1015 addc r12,r12,r8
1016 adde r10,r10,r9
1017 addze r11,r0
1018 #mul_add_c(a[1],b[1],c3,c1,c2);
1019 $LD r6,`1*$BNSZ`(r4)
1020 $LD r7,`1*$BNSZ`(r5)
1021 $UMULL r8,r6,r7
1022 $UMULH r9,r6,r7
1023 addc r12,r12,r8
1024 adde r10,r10,r9
1025 addze r11,r11
1026 #mul_add_c(a[0],b[2],c3,c1,c2);
1027 $LD r6,`0*$BNSZ`(r4)
1028 $LD r7,`2*$BNSZ`(r5)
1029 $UMULL r8,r6,r7
1030 $UMULH r9,r6,r7
1031 addc r12,r12,r8
1032 adde r10,r10,r9
1033 addze r11,r11
1034 $ST r12,`2*$BNSZ`(r3) #r[2]=c3;
1035 #mul_add_c(a[0],b[3],c1,c2,c3);
1036 $LD r7,`3*$BNSZ`(r5)
1037 $UMULL r8,r6,r7
1038 $UMULH r9,r6,r7
1039 addc r10,r10,r8
1040 adde r11,r11,r9
1041 addze r12,r0
1042 #mul_add_c(a[1],b[2],c1,c2,c3);
1043 $LD r6,`1*$BNSZ`(r4)
1044 $LD r7,`2*$BNSZ`(r5)
1045 $UMULL r8,r6,r7
1046 $UMULH r9,r6,r7
1047 addc r10,r10,r8
1048 adde r11,r11,r9
1049 addze r12,r12
1050
1051 #mul_add_c(a[2],b[1],c1,c2,c3);
1052 $LD r6,`2*$BNSZ`(r4)
1053 $LD r7,`1*$BNSZ`(r5)
1054 $UMULL r8,r6,r7
1055 $UMULH r9,r6,r7
1056 addc r10,r10,r8
1057 adde r11,r11,r9
1058 addze r12,r12
1059 #mul_add_c(a[3],b[0],c1,c2,c3);
1060 $LD r6,`3*$BNSZ`(r4)
1061 $LD r7,`0*$BNSZ`(r5)
1062 $UMULL r8,r6,r7
1063 $UMULH r9,r6,r7
1064 addc r10,r10,r8
1065 adde r11,r11,r9
1066 addze r12,r12
1067 $ST r10,`3*$BNSZ`(r3) #r[3]=c1;
1068 #mul_add_c(a[4],b[0],c2,c3,c1);
1069 $LD r6,`4*$BNSZ`(r4)
1070 $UMULL r8,r6,r7
1071 $UMULH r9,r6,r7
1072 addc r11,r11,r8
1073 adde r12,r12,r9
1074 addze r10,r0
1075 #mul_add_c(a[3],b[1],c2,c3,c1);
1076 $LD r6,`3*$BNSZ`(r4)
1077 $LD r7,`1*$BNSZ`(r5)
1078 $UMULL r8,r6,r7
1079 $UMULH r9,r6,r7
1080 addc r11,r11,r8
1081 adde r12,r12,r9
1082 addze r10,r10
1083 #mul_add_c(a[2],b[2],c2,c3,c1);
1084 $LD r6,`2*$BNSZ`(r4)
1085 $LD r7,`2*$BNSZ`(r5)
1086 $UMULL r8,r6,r7
1087 $UMULH r9,r6,r7
1088 addc r11,r11,r8
1089 adde r12,r12,r9
1090 addze r10,r10
1091 #mul_add_c(a[1],b[3],c2,c3,c1);
1092 $LD r6,`1*$BNSZ`(r4)
1093 $LD r7,`3*$BNSZ`(r5)
1094 $UMULL r8,r6,r7
1095 $UMULH r9,r6,r7
1096 addc r11,r11,r8
1097 adde r12,r12,r9
1098 addze r10,r10
1099 #mul_add_c(a[0],b[4],c2,c3,c1);
1100 $LD r6,`0*$BNSZ`(r4)
1101 $LD r7,`4*$BNSZ`(r5)
1102 $UMULL r8,r6,r7
1103 $UMULH r9,r6,r7
1104 addc r11,r11,r8
1105 adde r12,r12,r9
1106 addze r10,r10
1107 $ST r11,`4*$BNSZ`(r3) #r[4]=c2;
1108 #mul_add_c(a[0],b[5],c3,c1,c2);
1109 $LD r7,`5*$BNSZ`(r5)
1110 $UMULL r8,r6,r7
1111 $UMULH r9,r6,r7
1112 addc r12,r12,r8
1113 adde r10,r10,r9
1114 addze r11,r0
1115 #mul_add_c(a[1],b[4],c3,c1,c2);
1116 $LD r6,`1*$BNSZ`(r4)
1117 $LD r7,`4*$BNSZ`(r5)
1118 $UMULL r8,r6,r7
1119 $UMULH r9,r6,r7
1120 addc r12,r12,r8
1121 adde r10,r10,r9
1122 addze r11,r11
1123 #mul_add_c(a[2],b[3],c3,c1,c2);
1124 $LD r6,`2*$BNSZ`(r4)
1125 $LD r7,`3*$BNSZ`(r5)
1126 $UMULL r8,r6,r7
1127 $UMULH r9,r6,r7
1128 addc r12,r12,r8
1129 adde r10,r10,r9
1130 addze r11,r11
1131 #mul_add_c(a[3],b[2],c3,c1,c2);
1132 $LD r6,`3*$BNSZ`(r4)
1133 $LD r7,`2*$BNSZ`(r5)
1134 $UMULL r8,r6,r7
1135 $UMULH r9,r6,r7
1136 addc r12,r12,r8
1137 adde r10,r10,r9
1138 addze r11,r11
1139 #mul_add_c(a[4],b[1],c3,c1,c2);
1140 $LD r6,`4*$BNSZ`(r4)
1141 $LD r7,`1*$BNSZ`(r5)
1142 $UMULL r8,r6,r7
1143 $UMULH r9,r6,r7
1144 addc r12,r12,r8
1145 adde r10,r10,r9
1146 addze r11,r11
1147 #mul_add_c(a[5],b[0],c3,c1,c2);
1148 $LD r6,`5*$BNSZ`(r4)
1149 $LD r7,`0*$BNSZ`(r5)
1150 $UMULL r8,r6,r7
1151 $UMULH r9,r6,r7
1152 addc r12,r12,r8
1153 adde r10,r10,r9
1154 addze r11,r11
1155 $ST r12,`5*$BNSZ`(r3) #r[5]=c3;
1156 #mul_add_c(a[6],b[0],c1,c2,c3);
1157 $LD r6,`6*$BNSZ`(r4)
1158 $UMULL r8,r6,r7
1159 $UMULH r9,r6,r7
1160 addc r10,r10,r8
1161 adde r11,r11,r9
1162 addze r12,r0
1163 #mul_add_c(a[5],b[1],c1,c2,c3);
1164 $LD r6,`5*$BNSZ`(r4)
1165 $LD r7,`1*$BNSZ`(r5)
1166 $UMULL r8,r6,r7
1167 $UMULH r9,r6,r7
1168 addc r10,r10,r8
1169 adde r11,r11,r9
1170 addze r12,r12
1171 #mul_add_c(a[4],b[2],c1,c2,c3);
1172 $LD r6,`4*$BNSZ`(r4)
1173 $LD r7,`2*$BNSZ`(r5)
1174 $UMULL r8,r6,r7
1175 $UMULH r9,r6,r7
1176 addc r10,r10,r8
1177 adde r11,r11,r9
1178 addze r12,r12
1179 #mul_add_c(a[3],b[3],c1,c2,c3);
1180 $LD r6,`3*$BNSZ`(r4)
1181 $LD r7,`3*$BNSZ`(r5)
1182 $UMULL r8,r6,r7
1183 $UMULH r9,r6,r7
1184 addc r10,r10,r8
1185 adde r11,r11,r9
1186 addze r12,r12
1187 #mul_add_c(a[2],b[4],c1,c2,c3);
1188 $LD r6,`2*$BNSZ`(r4)
1189 $LD r7,`4*$BNSZ`(r5)
1190 $UMULL r8,r6,r7
1191 $UMULH r9,r6,r7
1192 addc r10,r10,r8
1193 adde r11,r11,r9
1194 addze r12,r12
1195 #mul_add_c(a[1],b[5],c1,c2,c3);
1196 $LD r6,`1*$BNSZ`(r4)
1197 $LD r7,`5*$BNSZ`(r5)
1198 $UMULL r8,r6,r7
1199 $UMULH r9,r6,r7
1200 addc r10,r10,r8
1201 adde r11,r11,r9
1202 addze r12,r12
1203 #mul_add_c(a[0],b[6],c1,c2,c3);
1204 $LD r6,`0*$BNSZ`(r4)
1205 $LD r7,`6*$BNSZ`(r5)
1206 $UMULL r8,r6,r7
1207 $UMULH r9,r6,r7
1208 addc r10,r10,r8
1209 adde r11,r11,r9
1210 addze r12,r12
1211 $ST r10,`6*$BNSZ`(r3) #r[6]=c1;
1212 #mul_add_c(a[0],b[7],c2,c3,c1);
1213 $LD r7,`7*$BNSZ`(r5)
1214 $UMULL r8,r6,r7
1215 $UMULH r9,r6,r7
1216 addc r11,r11,r8
1217 adde r12,r12,r9
1218 addze r10,r0
1219 #mul_add_c(a[1],b[6],c2,c3,c1);
1220 $LD r6,`1*$BNSZ`(r4)
1221 $LD r7,`6*$BNSZ`(r5)
1222 $UMULL r8,r6,r7
1223 $UMULH r9,r6,r7
1224 addc r11,r11,r8
1225 adde r12,r12,r9
1226 addze r10,r10
1227 #mul_add_c(a[2],b[5],c2,c3,c1);
1228 $LD r6,`2*$BNSZ`(r4)
1229 $LD r7,`5*$BNSZ`(r5)
1230 $UMULL r8,r6,r7
1231 $UMULH r9,r6,r7
1232 addc r11,r11,r8
1233 adde r12,r12,r9
1234 addze r10,r10
1235 #mul_add_c(a[3],b[4],c2,c3,c1);
1236 $LD r6,`3*$BNSZ`(r4)
1237 $LD r7,`4*$BNSZ`(r5)
1238 $UMULL r8,r6,r7
1239 $UMULH r9,r6,r7
1240 addc r11,r11,r8
1241 adde r12,r12,r9
1242 addze r10,r10
1243 #mul_add_c(a[4],b[3],c2,c3,c1);
1244 $LD r6,`4*$BNSZ`(r4)
1245 $LD r7,`3*$BNSZ`(r5)
1246 $UMULL r8,r6,r7
1247 $UMULH r9,r6,r7
1248 addc r11,r11,r8
1249 adde r12,r12,r9
1250 addze r10,r10
1251 #mul_add_c(a[5],b[2],c2,c3,c1);
1252 $LD r6,`5*$BNSZ`(r4)
1253 $LD r7,`2*$BNSZ`(r5)
1254 $UMULL r8,r6,r7
1255 $UMULH r9,r6,r7
1256 addc r11,r11,r8
1257 adde r12,r12,r9
1258 addze r10,r10
1259 #mul_add_c(a[6],b[1],c2,c3,c1);
1260 $LD r6,`6*$BNSZ`(r4)
1261 $LD r7,`1*$BNSZ`(r5)
1262 $UMULL r8,r6,r7
1263 $UMULH r9,r6,r7
1264 addc r11,r11,r8
1265 adde r12,r12,r9
1266 addze r10,r10
1267 #mul_add_c(a[7],b[0],c2,c3,c1);
1268 $LD r6,`7*$BNSZ`(r4)
1269 $LD r7,`0*$BNSZ`(r5)
1270 $UMULL r8,r6,r7
1271 $UMULH r9,r6,r7
1272 addc r11,r11,r8
1273 adde r12,r12,r9
1274 addze r10,r10
1275 $ST r11,`7*$BNSZ`(r3) #r[7]=c2;
1276 #mul_add_c(a[7],b[1],c3,c1,c2);
1277 $LD r7,`1*$BNSZ`(r5)
1278 $UMULL r8,r6,r7
1279 $UMULH r9,r6,r7
1280 addc r12,r12,r8
1281 adde r10,r10,r9
1282 addze r11,r0
1283 #mul_add_c(a[6],b[2],c3,c1,c2);
1284 $LD r6,`6*$BNSZ`(r4)
1285 $LD r7,`2*$BNSZ`(r5)
1286 $UMULL r8,r6,r7
1287 $UMULH r9,r6,r7
1288 addc r12,r12,r8
1289 adde r10,r10,r9
1290 addze r11,r11
1291 #mul_add_c(a[5],b[3],c3,c1,c2);
1292 $LD r6,`5*$BNSZ`(r4)
1293 $LD r7,`3*$BNSZ`(r5)
1294 $UMULL r8,r6,r7
1295 $UMULH r9,r6,r7
1296 addc r12,r12,r8
1297 adde r10,r10,r9
1298 addze r11,r11
1299 #mul_add_c(a[4],b[4],c3,c1,c2);
1300 $LD r6,`4*$BNSZ`(r4)
1301 $LD r7,`4*$BNSZ`(r5)
1302 $UMULL r8,r6,r7
1303 $UMULH r9,r6,r7
1304 addc r12,r12,r8
1305 adde r10,r10,r9
1306 addze r11,r11
1307 #mul_add_c(a[3],b[5],c3,c1,c2);
1308 $LD r6,`3*$BNSZ`(r4)
1309 $LD r7,`5*$BNSZ`(r5)
1310 $UMULL r8,r6,r7
1311 $UMULH r9,r6,r7
1312 addc r12,r12,r8
1313 adde r10,r10,r9
1314 addze r11,r11
1315 #mul_add_c(a[2],b[6],c3,c1,c2);
1316 $LD r6,`2*$BNSZ`(r4)
1317 $LD r7,`6*$BNSZ`(r5)
1318 $UMULL r8,r6,r7
1319 $UMULH r9,r6,r7
1320 addc r12,r12,r8
1321 adde r10,r10,r9
1322 addze r11,r11
1323 #mul_add_c(a[1],b[7],c3,c1,c2);
1324 $LD r6,`1*$BNSZ`(r4)
1325 $LD r7,`7*$BNSZ`(r5)
1326 $UMULL r8,r6,r7
1327 $UMULH r9,r6,r7
1328 addc r12,r12,r8
1329 adde r10,r10,r9
1330 addze r11,r11
1331 $ST r12,`8*$BNSZ`(r3) #r[8]=c3;
1332 #mul_add_c(a[2],b[7],c1,c2,c3);
1333 $LD r6,`2*$BNSZ`(r4)
1334 $UMULL r8,r6,r7
1335 $UMULH r9,r6,r7
1336 addc r10,r10,r8
1337 adde r11,r11,r9
1338 addze r12,r0
1339 #mul_add_c(a[3],b[6],c1,c2,c3);
1340 $LD r6,`3*$BNSZ`(r4)
1341 $LD r7,`6*$BNSZ`(r5)
1342 $UMULL r8,r6,r7
1343 $UMULH r9,r6,r7
1344 addc r10,r10,r8
1345 adde r11,r11,r9
1346 addze r12,r12
1347 #mul_add_c(a[4],b[5],c1,c2,c3);
1348 $LD r6,`4*$BNSZ`(r4)
1349 $LD r7,`5*$BNSZ`(r5)
1350 $UMULL r8,r6,r7
1351 $UMULH r9,r6,r7
1352 addc r10,r10,r8
1353 adde r11,r11,r9
1354 addze r12,r12
1355 #mul_add_c(a[5],b[4],c1,c2,c3);
1356 $LD r6,`5*$BNSZ`(r4)
1357 $LD r7,`4*$BNSZ`(r5)
1358 $UMULL r8,r6,r7
1359 $UMULH r9,r6,r7
1360 addc r10,r10,r8
1361 adde r11,r11,r9
1362 addze r12,r12
1363 #mul_add_c(a[6],b[3],c1,c2,c3);
1364 $LD r6,`6*$BNSZ`(r4)
1365 $LD r7,`3*$BNSZ`(r5)
1366 $UMULL r8,r6,r7
1367 $UMULH r9,r6,r7
1368 addc r10,r10,r8
1369 adde r11,r11,r9
1370 addze r12,r12
1371 #mul_add_c(a[7],b[2],c1,c2,c3);
1372 $LD r6,`7*$BNSZ`(r4)
1373 $LD r7,`2*$BNSZ`(r5)
1374 $UMULL r8,r6,r7
1375 $UMULH r9,r6,r7
1376 addc r10,r10,r8
1377 adde r11,r11,r9
1378 addze r12,r12
1379 $ST r10,`9*$BNSZ`(r3) #r[9]=c1;
1380 #mul_add_c(a[7],b[3],c2,c3,c1);
1381 $LD r7,`3*$BNSZ`(r5)
1382 $UMULL r8,r6,r7
1383 $UMULH r9,r6,r7
1384 addc r11,r11,r8
1385 adde r12,r12,r9
1386 addze r10,r0
1387 #mul_add_c(a[6],b[4],c2,c3,c1);
1388 $LD r6,`6*$BNSZ`(r4)
1389 $LD r7,`4*$BNSZ`(r5)
1390 $UMULL r8,r6,r7
1391 $UMULH r9,r6,r7
1392 addc r11,r11,r8
1393 adde r12,r12,r9
1394 addze r10,r10
1395 #mul_add_c(a[5],b[5],c2,c3,c1);
1396 $LD r6,`5*$BNSZ`(r4)
1397 $LD r7,`5*$BNSZ`(r5)
1398 $UMULL r8,r6,r7
1399 $UMULH r9,r6,r7
1400 addc r11,r11,r8
1401 adde r12,r12,r9
1402 addze r10,r10
1403 #mul_add_c(a[4],b[6],c2,c3,c1);
1404 $LD r6,`4*$BNSZ`(r4)
1405 $LD r7,`6*$BNSZ`(r5)
1406 $UMULL r8,r6,r7
1407 $UMULH r9,r6,r7
1408 addc r11,r11,r8
1409 adde r12,r12,r9
1410 addze r10,r10
1411 #mul_add_c(a[3],b[7],c2,c3,c1);
1412 $LD r6,`3*$BNSZ`(r4)
1413 $LD r7,`7*$BNSZ`(r5)
1414 $UMULL r8,r6,r7
1415 $UMULH r9,r6,r7
1416 addc r11,r11,r8
1417 adde r12,r12,r9
1418 addze r10,r10
1419 $ST r11,`10*$BNSZ`(r3) #r[10]=c2;
1420 #mul_add_c(a[4],b[7],c3,c1,c2);
1421 $LD r6,`4*$BNSZ`(r4)
1422 $UMULL r8,r6,r7
1423 $UMULH r9,r6,r7
1424 addc r12,r12,r8
1425 adde r10,r10,r9
1426 addze r11,r0
1427 #mul_add_c(a[5],b[6],c3,c1,c2);
1428 $LD r6,`5*$BNSZ`(r4)
1429 $LD r7,`6*$BNSZ`(r5)
1430 $UMULL r8,r6,r7
1431 $UMULH r9,r6,r7
1432 addc r12,r12,r8
1433 adde r10,r10,r9
1434 addze r11,r11
1435 #mul_add_c(a[6],b[5],c3,c1,c2);
1436 $LD r6,`6*$BNSZ`(r4)
1437 $LD r7,`5*$BNSZ`(r5)
1438 $UMULL r8,r6,r7
1439 $UMULH r9,r6,r7
1440 addc r12,r12,r8
1441 adde r10,r10,r9
1442 addze r11,r11
1443 #mul_add_c(a[7],b[4],c3,c1,c2);
1444 $LD r6,`7*$BNSZ`(r4)
1445 $LD r7,`4*$BNSZ`(r5)
1446 $UMULL r8,r6,r7
1447 $UMULH r9,r6,r7
1448 addc r12,r12,r8
1449 adde r10,r10,r9
1450 addze r11,r11
1451 $ST r12,`11*$BNSZ`(r3) #r[11]=c3;
1452 #mul_add_c(a[7],b[5],c1,c2,c3);
1453 $LD r7,`5*$BNSZ`(r5)
1454 $UMULL r8,r6,r7
1455 $UMULH r9,r6,r7
1456 addc r10,r10,r8
1457 adde r11,r11,r9
1458 addze r12,r0
1459 #mul_add_c(a[6],b[6],c1,c2,c3);
1460 $LD r6,`6*$BNSZ`(r4)
1461 $LD r7,`6*$BNSZ`(r5)
1462 $UMULL r8,r6,r7
1463 $UMULH r9,r6,r7
1464 addc r10,r10,r8
1465 adde r11,r11,r9
1466 addze r12,r12
1467 #mul_add_c(a[5],b[7],c1,c2,c3);
1468 $LD r6,`5*$BNSZ`(r4)
1469 $LD r7,`7*$BNSZ`(r5)
1470 $UMULL r8,r6,r7
1471 $UMULH r9,r6,r7
1472 addc r10,r10,r8
1473 adde r11,r11,r9
1474 addze r12,r12
1475 $ST r10,`12*$BNSZ`(r3) #r[12]=c1;
1476 #mul_add_c(a[6],b[7],c2,c3,c1);
1477 $LD r6,`6*$BNSZ`(r4)
1478 $UMULL r8,r6,r7
1479 $UMULH r9,r6,r7
1480 addc r11,r11,r8
1481 adde r12,r12,r9
1482 addze r10,r0
1483 #mul_add_c(a[7],b[6],c2,c3,c1);
1484 $LD r6,`7*$BNSZ`(r4)
1485 $LD r7,`6*$BNSZ`(r5)
1486 $UMULL r8,r6,r7
1487 $UMULH r9,r6,r7
1488 addc r11,r11,r8
1489 adde r12,r12,r9
1490 addze r10,r10
1491 $ST r11,`13*$BNSZ`(r3) #r[13]=c2;
1492 #mul_add_c(a[7],b[7],c3,c1,c2);
1493 $LD r7,`7*$BNSZ`(r5)
1494 $UMULL r8,r6,r7
1495 $UMULH r9,r6,r7
1496 addc r12,r12,r8
1497 adde r10,r10,r9
1498 $ST r12,`14*$BNSZ`(r3) #r[14]=c3;
1499 $ST r10,`15*$BNSZ`(r3) #r[15]=c1;
1500 blr
1501
1502#
1503# NOTE: The following label name should be changed to
1504# "bn_sub_words" i.e. remove the first dot
1505# for the gcc compiler. This should be automatically
1506# done in the build
1507#
1508#
1509.align 4
1510.bn_sub_words:
1511#
1512# Handcoded version of bn_sub_words
1513#
1514#BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1515#
1516# r3 = r
1517# r4 = a
1518# r5 = b
1519# r6 = n
1520#
1521# Note: No loop unrolling done since this is not a performance
1522# critical loop.
1523
1524 xor r0,r0,r0 #set r0 = 0
1525#
1526# check for r6 = 0 AND set carry bit.
1527#
1528 subfc. r7,r0,r6 # If r6 is 0 then result is 0.
1529 # if r6 > 0 then result !=0
1530 # In either case carry bit is set.
1531 beq Lppcasm_sub_adios
1532 addi r4,r4,-$BNSZ
1533 addi r3,r3,-$BNSZ
1534 addi r5,r5,-$BNSZ
1535 mtctr r6
1536Lppcasm_sub_mainloop:
1537 $LDU r7,$BNSZ(r4)
1538 $LDU r8,$BNSZ(r5)
1539 subfe r6,r8,r7 # r6 = r7+carry bit + onescomplement(r8)
1540 # if carry = 1 this is r7-r8. Else it
1541 # is r7-r8 -1 as we need.
1542 $STU r6,$BNSZ(r3)
1543 bdnz- Lppcasm_sub_mainloop
1544Lppcasm_sub_adios:
1545 subfze r3,r0 # if carry bit is set then r3 = 0 else -1
1546 andi. r3,r3,1 # keep only last bit.
1547 blr
1548
1549#
1550# NOTE: The following label name should be changed to
1551# "bn_add_words" i.e. remove the first dot
1552# for the gcc compiler. This should be automatically
1553# done in the build
1554#
1555
1556.align 4
1557.bn_add_words:
1558#
1559# Handcoded version of bn_add_words
1560#
1561#BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1562#
1563# r3 = r
1564# r4 = a
1565# r5 = b
1566# r6 = n
1567#
1568# Note: No loop unrolling done since this is not a performance
1569# critical loop.
1570
1571 xor r0,r0,r0
1572#
1573# check for r6 = 0. Is this needed?
1574#
1575 addic. r6,r6,0 #test r6 and clear carry bit.
1576 beq Lppcasm_add_adios
1577 addi r4,r4,-$BNSZ
1578 addi r3,r3,-$BNSZ
1579 addi r5,r5,-$BNSZ
1580 mtctr r6
1581Lppcasm_add_mainloop:
1582 $LDU r7,$BNSZ(r4)
1583 $LDU r8,$BNSZ(r5)
1584 adde r8,r7,r8
1585 $STU r8,$BNSZ(r3)
1586 bdnz- Lppcasm_add_mainloop
1587Lppcasm_add_adios:
1588 addze r3,r0 #return carry bit.
1589 blr
1590
1591#
1592# NOTE: The following label name should be changed to
1593# "bn_div_words" i.e. remove the first dot
1594# for the gcc compiler. This should be automatically
1595# done in the build
1596#
1597
1598.align 4
1599.bn_div_words:
1600#
1601# This is a cleaned up version of code generated by
1602# the AIX compiler. The only optimization is to use
1603# the PPC instruction to count leading zeros instead
1604# of call to num_bits_word. Since this was compiled
1605# only at level -O2 we can possibly squeeze it more?
1606#
1607# r3 = h
1608# r4 = l
1609# r5 = d
1610
1611 $UCMPI 0,r5,0 # compare r5 and 0
1612 bne Lppcasm_div1 # proceed if d!=0
1613 li r3,-1 # d=0 return -1
1614 blr
1615Lppcasm_div1:
1616 xor r0,r0,r0 #r0=0
1617 li r8,$BITS
1618 $CNTLZ. r7,r5 #r7 = num leading 0s in d.
1619 beq Lppcasm_div2 #proceed if no leading zeros
1620 subf r8,r7,r8 #r8 = BN_num_bits_word(d)
1621 $SHR. r9,r3,r8 #are there any bits above r8'th?
1622 $TR 16,r9,r0 #if there're, signal to dump core...
1623Lppcasm_div2:
1624 $UCMP 0,r3,r5 #h>=d?
1625 blt Lppcasm_div3 #goto Lppcasm_div3 if not
1626 subf r3,r5,r3 #h-=d ;
1627Lppcasm_div3: #r7 = BN_BITS2-i. so r7=i
1628 cmpi 0,0,r7,0 # is (i == 0)?
1629 beq Lppcasm_div4
1630 $SHL r3,r3,r7 # h = (h<< i)
1631 $SHR r8,r4,r8 # r8 = (l >> BN_BITS2 -i)
1632 $SHL r5,r5,r7 # d<<=i
1633 or r3,r3,r8 # h = (h<<i)|(l>>(BN_BITS2-i))
1634 $SHL r4,r4,r7 # l <<=i
1635Lppcasm_div4:
1636 $SHRI r9,r5,`$BITS/2` # r9 = dh
1637 # dl will be computed when needed
1638 # as it saves registers.
1639 li r6,2 #r6=2
1640 mtctr r6 #counter will be in count.
1641Lppcasm_divouterloop:
1642 $SHRI r8,r3,`$BITS/2` #r8 = (h>>BN_BITS4)
1643 $SHRI r11,r4,`$BITS/2` #r11= (l&BN_MASK2h)>>BN_BITS4
1644 # compute here for innerloop.
1645 $UCMP 0,r8,r9 # is (h>>BN_BITS4)==dh
1646 bne Lppcasm_div5 # goto Lppcasm_div5 if not
1647
1648 li r8,-1
1649 $CLRU r8,r8,`$BITS/2` #q = BN_MASK2l
1650 b Lppcasm_div6
1651Lppcasm_div5:
1652 $UDIV r8,r3,r9 #q = h/dh
1653Lppcasm_div6:
1654 $UMULL r12,r9,r8 #th = q*dh
1655 $CLRU r10,r5,`$BITS/2` #r10=dl
1656 $UMULL r6,r8,r10 #tl = q*dl
1657
1658Lppcasm_divinnerloop:
1659 subf r10,r12,r3 #t = h -th
1660 $SHRI r7,r10,`$BITS/2` #r7= (t &BN_MASK2H), sort of...
1661 addic. r7,r7,0 #test if r7 == 0. used below.
1662 # now want to compute
1663 # r7 = (t<<BN_BITS4)|((l&BN_MASK2h)>>BN_BITS4)
1664 # the following 2 instructions do that
1665 $SHLI r7,r10,`$BITS/2` # r7 = (t<<BN_BITS4)
1666 or r7,r7,r11 # r7|=((l&BN_MASK2h)>>BN_BITS4)
1667 $UCMP cr1,r6,r7 # compare (tl <= r7)
1668 bne Lppcasm_divinnerexit
1669 ble cr1,Lppcasm_divinnerexit
1670 addi r8,r8,-1 #q--
1671 subf r12,r9,r12 #th -=dh
1672 $CLRU r10,r5,`$BITS/2` #r10=dl. t is no longer needed in loop.
1673 subf r6,r10,r6 #tl -=dl
1674 b Lppcasm_divinnerloop
1675Lppcasm_divinnerexit:
1676 $SHRI r10,r6,`$BITS/2` #t=(tl>>BN_BITS4)
1677 $SHLI r11,r6,`$BITS/2` #tl=(tl<<BN_BITS4)&BN_MASK2h;
1678 $UCMP cr1,r4,r11 # compare l and tl
1679 add r12,r12,r10 # th+=t
1680 bge cr1,Lppcasm_div7 # if (l>=tl) goto Lppcasm_div7
1681 addi r12,r12,1 # th++
1682Lppcasm_div7:
1683 subf r11,r11,r4 #r11=l-tl
1684 $UCMP cr1,r3,r12 #compare h and th
1685 bge cr1,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8
1686 addi r8,r8,-1 # q--
1687 add r3,r5,r3 # h+=d
1688Lppcasm_div8:
1689 subf r12,r12,r3 #r12 = h-th
1690 $SHLI r4,r11,`$BITS/2` #l=(l&BN_MASK2l)<<BN_BITS4
1691 # want to compute
1692 # h = ((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2
1693 # the following 2 instructions will do this.
1694 $INSR r11,r12,`$BITS/2`,`$BITS/2` # r11 is the value we want rotated $BITS/2.
1695 $ROTL r3,r11,`$BITS/2` # rotate by $BITS/2 and store in r3
1696 bdz Lppcasm_div9 #if (count==0) break ;
1697 $SHLI r0,r8,`$BITS/2` #ret =q<<BN_BITS4
1698 b Lppcasm_divouterloop
1699Lppcasm_div9:
1700 or r3,r8,r0
1701 blr
1702
1703#
1704# NOTE: The following label name should be changed to
1705# "bn_sqr_words" i.e. remove the first dot
1706# for the gcc compiler. This should be automatically
1707# done in the build
1708#
1709.align 4
1710.bn_sqr_words:
1711#
1712# Optimized version of bn_sqr_words
1713#
1714# void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
1715#
1716# r3 = r
1717# r4 = a
1718# r5 = n
1719#
1720# r6 = a[i].
1721# r7,r8 = product.
1722#
1723# No unrolling done here. Not performance critical.
1724
1725 addic. r5,r5,0 #test r5.
1726 beq Lppcasm_sqr_adios
1727 addi r4,r4,-$BNSZ
1728 addi r3,r3,-$BNSZ
1729 mtctr r5
1730Lppcasm_sqr_mainloop:
1731 #sqr(r[0],r[1],a[0]);
1732 $LDU r6,$BNSZ(r4)
1733 $UMULL r7,r6,r6
1734 $UMULH r8,r6,r6
1735 $STU r7,$BNSZ(r3)
1736 $STU r8,$BNSZ(r3)
1737 bdnz- Lppcasm_sqr_mainloop
1738Lppcasm_sqr_adios:
1739 blr
1740
1741#
1742# NOTE: The following label name should be changed to
1743# "bn_mul_words" i.e. remove the first dot
1744# for the gcc compiler. This should be automatically
1745# done in the build
1746#
1747
1748.align 4
1749.bn_mul_words:
1750#
1751# BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
1752#
1753# r3 = rp
1754# r4 = ap
1755# r5 = num
1756# r6 = w
1757 xor r0,r0,r0
1758 xor r12,r12,r12 # used for carry
1759 rlwinm. r7,r5,30,2,31 # num >> 2
1760 beq Lppcasm_mw_REM
1761 mtctr r7
1762Lppcasm_mw_LOOP:
1763 #mul(rp[0],ap[0],w,c1);
1764 $LD r8,`0*$BNSZ`(r4)
1765 $UMULL r9,r6,r8
1766 $UMULH r10,r6,r8
1767 addc r9,r9,r12
1768 #addze r10,r10 #carry is NOT ignored.
1769 #will be taken care of
1770 #in second spin below
1771 #using adde.
1772 $ST r9,`0*$BNSZ`(r3)
1773 #mul(rp[1],ap[1],w,c1);
1774 $LD r8,`1*$BNSZ`(r4)
1775 $UMULL r11,r6,r8
1776 $UMULH r12,r6,r8
1777 adde r11,r11,r10
1778 #addze r12,r12
1779 $ST r11,`1*$BNSZ`(r3)
1780 #mul(rp[2],ap[2],w,c1);
1781 $LD r8,`2*$BNSZ`(r4)
1782 $UMULL r9,r6,r8
1783 $UMULH r10,r6,r8
1784 adde r9,r9,r12
1785 #addze r10,r10
1786 $ST r9,`2*$BNSZ`(r3)
1787 #mul_add(rp[3],ap[3],w,c1);
1788 $LD r8,`3*$BNSZ`(r4)
1789 $UMULL r11,r6,r8
1790 $UMULH r12,r6,r8
1791 adde r11,r11,r10
1792 addze r12,r12 #this spin we collect carry into
1793 #r12
1794 $ST r11,`3*$BNSZ`(r3)
1795
1796 addi r3,r3,`4*$BNSZ`
1797 addi r4,r4,`4*$BNSZ`
1798 bdnz- Lppcasm_mw_LOOP
1799
1800Lppcasm_mw_REM:
1801 andi. r5,r5,0x3
1802 beq Lppcasm_mw_OVER
1803 #mul(rp[0],ap[0],w,c1);
1804 $LD r8,`0*$BNSZ`(r4)
1805 $UMULL r9,r6,r8
1806 $UMULH r10,r6,r8
1807 addc r9,r9,r12
1808 addze r10,r10
1809 $ST r9,`0*$BNSZ`(r3)
1810 addi r12,r10,0
1811
1812 addi r5,r5,-1
1813 cmpli 0,0,r5,0
1814 beq Lppcasm_mw_OVER
1815
1816
1817 #mul(rp[1],ap[1],w,c1);
1818 $LD r8,`1*$BNSZ`(r4)
1819 $UMULL r9,r6,r8
1820 $UMULH r10,r6,r8
1821 addc r9,r9,r12
1822 addze r10,r10
1823 $ST r9,`1*$BNSZ`(r3)
1824 addi r12,r10,0
1825
1826 addi r5,r5,-1
1827 cmpli 0,0,r5,0
1828 beq Lppcasm_mw_OVER
1829
1830 #mul_add(rp[2],ap[2],w,c1);
1831 $LD r8,`2*$BNSZ`(r4)
1832 $UMULL r9,r6,r8
1833 $UMULH r10,r6,r8
1834 addc r9,r9,r12
1835 addze r10,r10
1836 $ST r9,`2*$BNSZ`(r3)
1837 addi r12,r10,0
1838
1839Lppcasm_mw_OVER:
1840 addi r3,r12,0
1841 blr
1842
1843#
1844# NOTE: The following label name should be changed to
1845# "bn_mul_add_words" i.e. remove the first dot
1846# for the gcc compiler. This should be automatically
1847# done in the build
1848#
1849
1850.align 4
1851.bn_mul_add_words:
1852#
1853# BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
1854#
1855# r3 = rp
1856# r4 = ap
1857# r5 = num
1858# r6 = w
1859#
1860# empirical evidence suggests that unrolled version performs best!!
1861#
1862 xor r0,r0,r0 #r0 = 0
1863 xor r12,r12,r12 #r12 = 0 . used for carry
1864 rlwinm. r7,r5,30,2,31 # num >> 2
1865 beq Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover
1866 mtctr r7
1867Lppcasm_maw_mainloop:
1868 #mul_add(rp[0],ap[0],w,c1);
1869 $LD r8,`0*$BNSZ`(r4)
1870 $LD r11,`0*$BNSZ`(r3)
1871 $UMULL r9,r6,r8
1872 $UMULH r10,r6,r8
1873 addc r9,r9,r12 #r12 is carry.
1874 addze r10,r10
1875 addc r9,r9,r11
1876 #addze r10,r10
1877 #the above instruction addze
1878 #is NOT needed. Carry will NOT
1879 #be ignored. It's not affected
1880 #by multiply and will be collected
1881 #in the next spin
1882 $ST r9,`0*$BNSZ`(r3)
1883
1884 #mul_add(rp[1],ap[1],w,c1);
1885 $LD r8,`1*$BNSZ`(r4)
1886 $LD r9,`1*$BNSZ`(r3)
1887 $UMULL r11,r6,r8
1888 $UMULH r12,r6,r8
1889 adde r11,r11,r10 #r10 is carry.
1890 addze r12,r12
1891 addc r11,r11,r9
1892 #addze r12,r12
1893 $ST r11,`1*$BNSZ`(r3)
1894
1895 #mul_add(rp[2],ap[2],w,c1);
1896 $LD r8,`2*$BNSZ`(r4)
1897 $UMULL r9,r6,r8
1898 $LD r11,`2*$BNSZ`(r3)
1899 $UMULH r10,r6,r8
1900 adde r9,r9,r12
1901 addze r10,r10
1902 addc r9,r9,r11
1903 #addze r10,r10
1904 $ST r9,`2*$BNSZ`(r3)
1905
1906 #mul_add(rp[3],ap[3],w,c1);
1907 $LD r8,`3*$BNSZ`(r4)
1908 $UMULL r11,r6,r8
1909 $LD r9,`3*$BNSZ`(r3)
1910 $UMULH r12,r6,r8
1911 adde r11,r11,r10
1912 addze r12,r12
1913 addc r11,r11,r9
1914 addze r12,r12
1915 $ST r11,`3*$BNSZ`(r3)
1916 addi r3,r3,`4*$BNSZ`
1917 addi r4,r4,`4*$BNSZ`
1918 bdnz- Lppcasm_maw_mainloop
1919
1920Lppcasm_maw_leftover:
1921 andi. r5,r5,0x3
1922 beq Lppcasm_maw_adios
1923 addi r3,r3,-$BNSZ
1924 addi r4,r4,-$BNSZ
1925 #mul_add(rp[0],ap[0],w,c1);
1926 mtctr r5
1927 $LDU r8,$BNSZ(r4)
1928 $UMULL r9,r6,r8
1929 $UMULH r10,r6,r8
1930 $LDU r11,$BNSZ(r3)
1931 addc r9,r9,r11
1932 addze r10,r10
1933 addc r9,r9,r12
1934 addze r12,r10
1935 $ST r9,0(r3)
1936
1937 bdz Lppcasm_maw_adios
1938 #mul_add(rp[1],ap[1],w,c1);
1939 $LDU r8,$BNSZ(r4)
1940 $UMULL r9,r6,r8
1941 $UMULH r10,r6,r8
1942 $LDU r11,$BNSZ(r3)
1943 addc r9,r9,r11
1944 addze r10,r10
1945 addc r9,r9,r12
1946 addze r12,r10
1947 $ST r9,0(r3)
1948
1949 bdz Lppcasm_maw_adios
1950 #mul_add(rp[2],ap[2],w,c1);
1951 $LDU r8,$BNSZ(r4)
1952 $UMULL r9,r6,r8
1953 $UMULH r10,r6,r8
1954 $LDU r11,$BNSZ(r3)
1955 addc r9,r9,r11
1956 addze r10,r10
1957 addc r9,r9,r12
1958 addze r12,r10
1959 $ST r9,0(r3)
1960
1961Lppcasm_maw_adios:
1962 addi r3,r12,0
1963 blr
1964 .align 4
1965EOF
1966$data =~ s/\`([^\`]*)\`/eval $1/gem;
1967print $data;
1968close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/x86-mont.pl b/src/lib/libcrypto/bn/asm/x86-mont.pl
deleted file mode 100755
index 6524651748..0000000000
--- a/src/lib/libcrypto/bn/asm/x86-mont.pl
+++ /dev/null
@@ -1,592 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# October 2005
11#
12# This is a "teaser" code, as it can be improved in several ways...
13# First of all non-SSE2 path should be implemented (yes, for now it
14# performs Montgomery multiplication/convolution only on SSE2-capable
15# CPUs such as P4, others fall down to original code). Then inner loop
16# can be unrolled and modulo-scheduled to improve ILP and possibly
17# moved to 128-bit XMM register bank (though it would require input
18# rearrangement and/or increase bus bandwidth utilization). Dedicated
19# squaring procedure should give further performance improvement...
20# Yet, for being draft, the code improves rsa512 *sign* benchmark by
21# 110%(!), rsa1024 one - by 70% and rsa4096 - by 20%:-)
22
23# December 2006
24#
25# Modulo-scheduling SSE2 loops results in further 15-20% improvement.
26# Integer-only code [being equipped with dedicated squaring procedure]
27# gives ~40% on rsa512 sign benchmark...
28
29$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
30push(@INC,"${dir}","${dir}../../perlasm");
31require "x86asm.pl";
32
33&asm_init($ARGV[0],$0);
34
35$sse2=0;
36for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
37
38&external_label("OPENSSL_ia32cap_P") if ($sse2);
39
40&function_begin("bn_mul_mont");
41
42$i="edx";
43$j="ecx";
44$ap="esi"; $tp="esi"; # overlapping variables!!!
45$rp="edi"; $bp="edi"; # overlapping variables!!!
46$np="ebp";
47$num="ebx";
48
49$_num=&DWP(4*0,"esp"); # stack top layout
50$_rp=&DWP(4*1,"esp");
51$_ap=&DWP(4*2,"esp");
52$_bp=&DWP(4*3,"esp");
53$_np=&DWP(4*4,"esp");
54$_n0=&DWP(4*5,"esp"); $_n0q=&QWP(4*5,"esp");
55$_sp=&DWP(4*6,"esp");
56$_bpend=&DWP(4*7,"esp");
57$frame=32; # size of above frame rounded up to 16n
58
59 &xor ("eax","eax");
60 &mov ("edi",&wparam(5)); # int num
61 &cmp ("edi",4);
62 &jl (&label("just_leave"));
63
64 &lea ("esi",&wparam(0)); # put aside pointer to argument block
65 &lea ("edx",&wparam(1)); # load ap
66 &mov ("ebp","esp"); # saved stack pointer!
67 &add ("edi",2); # extra two words on top of tp
68 &neg ("edi");
69 &lea ("esp",&DWP(-$frame,"esp","edi",4)); # alloca($frame+4*(num+2))
70 &neg ("edi");
71
72 # minimize cache contention by arranging 2K window between stack
73 # pointer and ap argument [np is also position sensitive vector,
74 # but it's assumed to be near ap, as it's allocated at ~same
75 # time].
76 &mov ("eax","esp");
77 &sub ("eax","edx");
78 &and ("eax",2047);
79 &sub ("esp","eax"); # this aligns sp and ap modulo 2048
80
81 &xor ("edx","esp");
82 &and ("edx",2048);
83 &xor ("edx",2048);
84 &sub ("esp","edx"); # this splits them apart modulo 4096
85
86 &and ("esp",-64); # align to cache line
87
88 ################################# load argument block...
89 &mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp
90 &mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap
91 &mov ("ecx",&DWP(2*4,"esi"));# const BN_ULONG *bp
92 &mov ("edx",&DWP(3*4,"esi"));# const BN_ULONG *np
93 &mov ("esi",&DWP(4*4,"esi"));# const BN_ULONG *n0
94 #&mov ("edi",&DWP(5*4,"esi"));# int num
95
96 &mov ("esi",&DWP(0,"esi")); # pull n0[0]
97 &mov ($_rp,"eax"); # ... save a copy of argument block
98 &mov ($_ap,"ebx");
99 &mov ($_bp,"ecx");
100 &mov ($_np,"edx");
101 &mov ($_n0,"esi");
102 &lea ($num,&DWP(-3,"edi")); # num=num-1 to assist modulo-scheduling
103 #&mov ($_num,$num); # redundant as $num is not reused
104 &mov ($_sp,"ebp"); # saved stack pointer!
105
106if($sse2) {
107$acc0="mm0"; # mmx register bank layout
108$acc1="mm1";
109$car0="mm2";
110$car1="mm3";
111$mul0="mm4";
112$mul1="mm5";
113$temp="mm6";
114$mask="mm7";
115
116 &picsetup("eax");
117 &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
118 &bt (&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
119 &jnc (&label("non_sse2"));
120
121 &mov ("eax",-1);
122 &movd ($mask,"eax"); # mask 32 lower bits
123
124 &mov ($ap,$_ap); # load input pointers
125 &mov ($bp,$_bp);
126 &mov ($np,$_np);
127
128 &xor ($i,$i); # i=0
129 &xor ($j,$j); # j=0
130
131 &movd ($mul0,&DWP(0,$bp)); # bp[0]
132 &movd ($mul1,&DWP(0,$ap)); # ap[0]
133 &movd ($car1,&DWP(0,$np)); # np[0]
134
135 &pmuludq($mul1,$mul0); # ap[0]*bp[0]
136 &movq ($car0,$mul1);
137 &movq ($acc0,$mul1); # I wish movd worked for
138 &pand ($acc0,$mask); # inter-register transfers
139
140 &pmuludq($mul1,$_n0q); # *=n0
141
142 &pmuludq($car1,$mul1); # "t[0]"*np[0]*n0
143 &paddq ($car1,$acc0);
144
145 &movd ($acc1,&DWP(4,$np)); # np[1]
146 &movd ($acc0,&DWP(4,$ap)); # ap[1]
147
148 &psrlq ($car0,32);
149 &psrlq ($car1,32);
150
151 &inc ($j); # j++
152&set_label("1st",16);
153 &pmuludq($acc0,$mul0); # ap[j]*bp[0]
154 &pmuludq($acc1,$mul1); # np[j]*m1
155 &paddq ($car0,$acc0); # +=c0
156 &paddq ($car1,$acc1); # +=c1
157
158 &movq ($acc0,$car0);
159 &pand ($acc0,$mask);
160 &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
161 &paddq ($car1,$acc0); # +=ap[j]*bp[0];
162 &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
163 &psrlq ($car0,32);
164 &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[j-1]=
165 &psrlq ($car1,32);
166
167 &lea ($j,&DWP(1,$j));
168 &cmp ($j,$num);
169 &jl (&label("1st"));
170
171 &pmuludq($acc0,$mul0); # ap[num-1]*bp[0]
172 &pmuludq($acc1,$mul1); # np[num-1]*m1
173 &paddq ($car0,$acc0); # +=c0
174 &paddq ($car1,$acc1); # +=c1
175
176 &movq ($acc0,$car0);
177 &pand ($acc0,$mask);
178 &paddq ($car1,$acc0); # +=ap[num-1]*bp[0];
179 &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
180
181 &psrlq ($car0,32);
182 &psrlq ($car1,32);
183
184 &paddq ($car1,$car0);
185 &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
186
187 &inc ($i); # i++
188&set_label("outer");
189 &xor ($j,$j); # j=0
190
191 &movd ($mul0,&DWP(0,$bp,$i,4)); # bp[i]
192 &movd ($mul1,&DWP(0,$ap)); # ap[0]
193 &movd ($temp,&DWP($frame,"esp")); # tp[0]
194 &movd ($car1,&DWP(0,$np)); # np[0]
195 &pmuludq($mul1,$mul0); # ap[0]*bp[i]
196
197 &paddq ($mul1,$temp); # +=tp[0]
198 &movq ($acc0,$mul1);
199 &movq ($car0,$mul1);
200 &pand ($acc0,$mask);
201
202 &pmuludq($mul1,$_n0q); # *=n0
203
204 &pmuludq($car1,$mul1);
205 &paddq ($car1,$acc0);
206
207 &movd ($temp,&DWP($frame+4,"esp")); # tp[1]
208 &movd ($acc1,&DWP(4,$np)); # np[1]
209 &movd ($acc0,&DWP(4,$ap)); # ap[1]
210
211 &psrlq ($car0,32);
212 &psrlq ($car1,32);
213 &paddq ($car0,$temp); # +=tp[1]
214
215 &inc ($j); # j++
216 &dec ($num);
217&set_label("inner");
218 &pmuludq($acc0,$mul0); # ap[j]*bp[i]
219 &pmuludq($acc1,$mul1); # np[j]*m1
220 &paddq ($car0,$acc0); # +=c0
221 &paddq ($car1,$acc1); # +=c1
222
223 &movq ($acc0,$car0);
224 &movd ($temp,&DWP($frame+4,"esp",$j,4));# tp[j+1]
225 &pand ($acc0,$mask);
226 &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
227 &paddq ($car1,$acc0); # +=ap[j]*bp[i]+tp[j]
228 &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
229 &psrlq ($car0,32);
230 &movd (&DWP($frame-4,"esp",$j,4),$car1);# tp[j-1]=
231 &psrlq ($car1,32);
232 &paddq ($car0,$temp); # +=tp[j+1]
233
234 &dec ($num);
235 &lea ($j,&DWP(1,$j)); # j++
236 &jnz (&label("inner"));
237
238 &mov ($num,$j);
239 &pmuludq($acc0,$mul0); # ap[num-1]*bp[i]
240 &pmuludq($acc1,$mul1); # np[num-1]*m1
241 &paddq ($car0,$acc0); # +=c0
242 &paddq ($car1,$acc1); # +=c1
243
244 &movq ($acc0,$car0);
245 &pand ($acc0,$mask);
246 &paddq ($car1,$acc0); # +=ap[num-1]*bp[i]+tp[num-1]
247 &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
248 &psrlq ($car0,32);
249 &psrlq ($car1,32);
250
251 &movd ($temp,&DWP($frame+4,"esp",$num,4)); # += tp[num]
252 &paddq ($car1,$car0);
253 &paddq ($car1,$temp);
254 &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
255
256 &lea ($i,&DWP(1,$i)); # i++
257 &cmp ($i,$num);
258 &jle (&label("outer"));
259
260 &emms (); # done with mmx bank
261 &jmp (&label("common_tail"));
262
263&set_label("non_sse2",16);
264}
265
266if (0) {
267 &mov ("esp",$_sp);
268 &xor ("eax","eax"); # signal "not fast enough [yet]"
269 &jmp (&label("just_leave"));
270 # While the below code provides competitive performance for
271 # all key lengths on modern Intel cores, it's still more
272 # than 10% slower for 4096-bit key elsewhere:-( "Competitive"
273 # means compared to the original integer-only assembler.
274 # 512-bit RSA sign is better by ~40%, but that's about all
275 # one can say about all CPUs...
276} else {
277$inp="esi"; # integer path uses these registers differently
278$word="edi";
279$carry="ebp";
280
281 &mov ($inp,$_ap);
282 &lea ($carry,&DWP(1,$num));
283 &mov ($word,$_bp);
284 &xor ($j,$j); # j=0
285 &mov ("edx",$inp);
286 &and ($carry,1); # see if num is even
287 &sub ("edx",$word); # see if ap==bp
288 &lea ("eax",&DWP(4,$word,$num,4)); # &bp[num]
289 &or ($carry,"edx");
290 &mov ($word,&DWP(0,$word)); # bp[0]
291 &jz (&label("bn_sqr_mont"));
292 &mov ($_bpend,"eax");
293 &mov ("eax",&DWP(0,$inp));
294 &xor ("edx","edx");
295
296&set_label("mull",16);
297 &mov ($carry,"edx");
298 &mul ($word); # ap[j]*bp[0]
299 &add ($carry,"eax");
300 &lea ($j,&DWP(1,$j));
301 &adc ("edx",0);
302 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
303 &cmp ($j,$num);
304 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
305 &jl (&label("mull"));
306
307 &mov ($carry,"edx");
308 &mul ($word); # ap[num-1]*bp[0]
309 &mov ($word,$_n0);
310 &add ("eax",$carry);
311 &mov ($inp,$_np);
312 &adc ("edx",0);
313 &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
314
315 &mov (&DWP($frame,"esp",$num,4),"eax"); # tp[num-1]=
316 &xor ($j,$j);
317 &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
318 &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
319
320 &mov ("eax",&DWP(0,$inp)); # np[0]
321 &mul ($word); # np[0]*m
322 &add ("eax",&DWP($frame,"esp")); # +=tp[0]
323 &mov ("eax",&DWP(4,$inp)); # np[1]
324 &adc ("edx",0);
325 &inc ($j);
326
327 &jmp (&label("2ndmadd"));
328
329&set_label("1stmadd",16);
330 &mov ($carry,"edx");
331 &mul ($word); # ap[j]*bp[i]
332 &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
333 &lea ($j,&DWP(1,$j));
334 &adc ("edx",0);
335 &add ($carry,"eax");
336 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
337 &adc ("edx",0);
338 &cmp ($j,$num);
339 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
340 &jl (&label("1stmadd"));
341
342 &mov ($carry,"edx");
343 &mul ($word); # ap[num-1]*bp[i]
344 &add ("eax",&DWP($frame,"esp",$num,4)); # +=tp[num-1]
345 &mov ($word,$_n0);
346 &adc ("edx",0);
347 &mov ($inp,$_np);
348 &add ($carry,"eax");
349 &adc ("edx",0);
350 &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
351
352 &xor ($j,$j);
353 &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
354 &mov (&DWP($frame,"esp",$num,4),$carry); # tp[num-1]=
355 &adc ($j,0);
356 &mov ("eax",&DWP(0,$inp)); # np[0]
357 &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
358 &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
359
360 &mul ($word); # np[0]*m
361 &add ("eax",&DWP($frame,"esp")); # +=tp[0]
362 &mov ("eax",&DWP(4,$inp)); # np[1]
363 &adc ("edx",0);
364 &mov ($j,1);
365
366&set_label("2ndmadd",16);
367 &mov ($carry,"edx");
368 &mul ($word); # np[j]*m
369 &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
370 &lea ($j,&DWP(1,$j));
371 &adc ("edx",0);
372 &add ($carry,"eax");
373 &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+1]
374 &adc ("edx",0);
375 &cmp ($j,$num);
376 &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j-1]=
377 &jl (&label("2ndmadd"));
378
379 &mov ($carry,"edx");
380 &mul ($word); # np[j]*m
381 &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
382 &adc ("edx",0);
383 &add ($carry,"eax");
384 &adc ("edx",0);
385 &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
386
387 &xor ("eax","eax");
388 &mov ($j,$_bp); # &bp[i]
389 &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
390 &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
391 &lea ($j,&DWP(4,$j));
392 &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
393 &cmp ($j,$_bpend);
394 &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
395 &je (&label("common_tail"));
396
397 &mov ($word,&DWP(0,$j)); # bp[i+1]
398 &mov ($inp,$_ap);
399 &mov ($_bp,$j); # &bp[++i]
400 &xor ($j,$j);
401 &xor ("edx","edx");
402 &mov ("eax",&DWP(0,$inp));
403 &jmp (&label("1stmadd"));
404
405&set_label("bn_sqr_mont",16);
406$sbit=$num;
407 &mov ($_num,$num);
408 &mov ($_bp,$j); # i=0
409
410 &mov ("eax",$word); # ap[0]
411 &mul ($word); # ap[0]*ap[0]
412 &mov (&DWP($frame,"esp"),"eax"); # tp[0]=
413 &mov ($sbit,"edx");
414 &shr ("edx",1);
415 &and ($sbit,1);
416 &inc ($j);
417&set_label("sqr",16);
418 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
419 &mov ($carry,"edx");
420 &mul ($word); # ap[j]*ap[0]
421 &add ("eax",$carry);
422 &lea ($j,&DWP(1,$j));
423 &adc ("edx",0);
424 &lea ($carry,&DWP(0,$sbit,"eax",2));
425 &shr ("eax",31);
426 &cmp ($j,$_num);
427 &mov ($sbit,"eax");
428 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
429 &jl (&label("sqr"));
430
431 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[num-1]
432 &mov ($carry,"edx");
433 &mul ($word); # ap[num-1]*ap[0]
434 &add ("eax",$carry);
435 &mov ($word,$_n0);
436 &adc ("edx",0);
437 &mov ($inp,$_np);
438 &lea ($carry,&DWP(0,$sbit,"eax",2));
439 &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
440 &shr ("eax",31);
441 &mov (&DWP($frame,"esp",$j,4),$carry); # tp[num-1]=
442
443 &lea ($carry,&DWP(0,"eax","edx",2));
444 &mov ("eax",&DWP(0,$inp)); # np[0]
445 &shr ("edx",31);
446 &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num]=
447 &mov (&DWP($frame+8,"esp",$j,4),"edx"); # tp[num+1]=
448
449 &mul ($word); # np[0]*m
450 &add ("eax",&DWP($frame,"esp")); # +=tp[0]
451 &mov ($num,$j);
452 &adc ("edx",0);
453 &mov ("eax",&DWP(4,$inp)); # np[1]
454 &mov ($j,1);
455
456&set_label("3rdmadd",16);
457 &mov ($carry,"edx");
458 &mul ($word); # np[j]*m
459 &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
460 &adc ("edx",0);
461 &add ($carry,"eax");
462 &mov ("eax",&DWP(4,$inp,$j,4)); # np[j+1]
463 &adc ("edx",0);
464 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j-1]=
465
466 &mov ($carry,"edx");
467 &mul ($word); # np[j+1]*m
468 &add ($carry,&DWP($frame+4,"esp",$j,4)); # +=tp[j+1]
469 &lea ($j,&DWP(2,$j));
470 &adc ("edx",0);
471 &add ($carry,"eax");
472 &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+2]
473 &adc ("edx",0);
474 &cmp ($j,$num);
475 &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j]=
476 &jl (&label("3rdmadd"));
477
478 &mov ($carry,"edx");
479 &mul ($word); # np[j]*m
480 &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
481 &adc ("edx",0);
482 &add ($carry,"eax");
483 &adc ("edx",0);
484 &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
485
486 &mov ($j,$_bp); # i
487 &xor ("eax","eax");
488 &mov ($inp,$_ap);
489 &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
490 &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
491 &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
492 &cmp ($j,$num);
493 &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
494 &je (&label("common_tail"));
495
496 &mov ($word,&DWP(4,$inp,$j,4)); # ap[i]
497 &lea ($j,&DWP(1,$j));
498 &mov ("eax",$word);
499 &mov ($_bp,$j); # ++i
500 &mul ($word); # ap[i]*ap[i]
501 &add ("eax",&DWP($frame,"esp",$j,4)); # +=tp[i]
502 &adc ("edx",0);
503 &mov (&DWP($frame,"esp",$j,4),"eax"); # tp[i]=
504 &xor ($carry,$carry);
505 &cmp ($j,$num);
506 &lea ($j,&DWP(1,$j));
507 &je (&label("sqrlast"));
508
509 &mov ($sbit,"edx"); # zaps $num
510 &shr ("edx",1);
511 &and ($sbit,1);
512&set_label("sqradd",16);
513 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
514 &mov ($carry,"edx");
515 &mul ($word); # ap[j]*ap[i]
516 &add ("eax",$carry);
517 &lea ($carry,&DWP(0,"eax","eax"));
518 &adc ("edx",0);
519 &shr ("eax",31);
520 &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
521 &lea ($j,&DWP(1,$j));
522 &adc ("eax",0);
523 &add ($carry,$sbit);
524 &adc ("eax",0);
525 &cmp ($j,$_num);
526 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
527 &mov ($sbit,"eax");
528 &jle (&label("sqradd"));
529
530 &mov ($carry,"edx");
531 &add ("edx","edx");
532 &shr ($carry,31);
533 &add ("edx",$sbit);
534 &adc ($carry,0);
535&set_label("sqrlast");
536 &mov ($word,$_n0);
537 &mov ($inp,$_np);
538 &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
539
540 &add ("edx",&DWP($frame,"esp",$j,4)); # +=tp[num]
541 &mov ("eax",&DWP(0,$inp)); # np[0]
542 &adc ($carry,0);
543 &mov (&DWP($frame,"esp",$j,4),"edx"); # tp[num]=
544 &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num+1]=
545
546 &mul ($word); # np[0]*m
547 &add ("eax",&DWP($frame,"esp")); # +=tp[0]
548 &lea ($num,&DWP(-1,$j));
549 &adc ("edx",0);
550 &mov ($j,1);
551 &mov ("eax",&DWP(4,$inp)); # np[1]
552
553 &jmp (&label("3rdmadd"));
554}
555
556&set_label("common_tail",16);
557 &mov ($np,$_np); # load modulus pointer
558 &mov ($rp,$_rp); # load result pointer
559 &lea ($tp,&DWP($frame,"esp")); # [$ap and $bp are zapped]
560
561 &mov ("eax",&DWP(0,$tp)); # tp[0]
562 &mov ($j,$num); # j=num-1
563 &xor ($i,$i); # i=0 and clear CF!
564
565&set_label("sub",16);
566 &sbb ("eax",&DWP(0,$np,$i,4));
567 &mov (&DWP(0,$rp,$i,4),"eax"); # rp[i]=tp[i]-np[i]
568 &dec ($j); # doesn't affect CF!
569 &mov ("eax",&DWP(4,$tp,$i,4)); # tp[i+1]
570 &lea ($i,&DWP(1,$i)); # i++
571 &jge (&label("sub"));
572
573 &sbb ("eax",0); # handle upmost overflow bit
574 &and ($tp,"eax");
575 &not ("eax");
576 &mov ($np,$rp);
577 &and ($np,"eax");
578 &or ($tp,$np); # tp=carry?tp:rp
579
580&set_label("copy",16); # copy or in-place refresh
581 &mov ("eax",&DWP(0,$tp,$num,4));
582 &mov (&DWP(0,$rp,$num,4),"eax"); # rp[i]=tp[i]
583 &mov (&DWP($frame,"esp",$num,4),$j); # zap temporary vector
584 &dec ($num);
585 &jge (&label("copy"));
586
587 &mov ("esp",$_sp); # pull saved stack pointer
588 &mov ("eax",1);
589&set_label("just_leave");
590&function_end("bn_mul_mont");
591
592&asm_finish();
diff --git a/src/lib/libcrypto/bn/asm/x86_64-mont.pl b/src/lib/libcrypto/bn/asm/x86_64-mont.pl
deleted file mode 100755
index 30cfab4fce..0000000000
--- a/src/lib/libcrypto/bn/asm/x86_64-mont.pl
+++ /dev/null
@@ -1,1503 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# October 2005.
11#
12# Montgomery multiplication routine for x86_64. While it gives modest
13# 9% improvement of rsa4096 sign on Opteron, rsa512 sign runs more
14# than twice, >2x, as fast. Most common rsa1024 sign is improved by
15# respectful 50%. It remains to be seen if loop unrolling and
16# dedicated squaring routine can provide further improvement...
17
18# July 2011.
19#
20# Add dedicated squaring procedure. Performance improvement varies
21# from platform to platform, but in average it's ~5%/15%/25%/33%
22# for 512-/1024-/2048-/4096-bit RSA *sign* benchmarks respectively.
23
24# August 2011.
25#
26# Unroll and modulo-schedule inner loops in such manner that they
27# are "fallen through" for input lengths of 8, which is critical for
28# 1024-bit RSA *sign*. Average performance improvement in comparison
29# to *initial* version of this module from 2005 is ~0%/30%/40%/45%
30# for 512-/1024-/2048-/4096-bit RSA *sign* benchmarks respectively.
31
32$flavour = shift;
33$output = shift;
34if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
35
36$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
37( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
38( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
39die "can't locate x86_64-xlate.pl";
40
41open OUT,"| \"$^X\" $xlate $flavour $output";
42*STDOUT=*OUT;
43
44# int bn_mul_mont(
45$rp="%rdi"; # BN_ULONG *rp,
46$ap="%rsi"; # const BN_ULONG *ap,
47$bp="%rdx"; # const BN_ULONG *bp,
48$np="%rcx"; # const BN_ULONG *np,
49$n0="%r8"; # const BN_ULONG *n0,
50$num="%r9"; # int num);
51$lo0="%r10";
52$hi0="%r11";
53$hi1="%r13";
54$i="%r14";
55$j="%r15";
56$m0="%rbx";
57$m1="%rbp";
58
59$code=<<___;
60.text
61
62.globl bn_mul_mont
63.type bn_mul_mont,\@function,6
64.align 16
65bn_mul_mont:
66 _CET_ENDBR
67 test \$3,${num}d
68 jnz .Lmul_enter
69 cmp \$8,${num}d
70 jb .Lmul_enter
71 cmp $ap,$bp
72 jne .Lmul4x_enter
73 jmp .Lsqr4x_enter
74
75.align 16
76.Lmul_enter:
77 push %rbx
78 push %rbp
79 push %r12
80 push %r13
81 push %r14
82 push %r15
83
84 mov ${num}d,${num}d
85 lea 2($num),%r10
86 mov %rsp,%r11
87 neg %r10
88 lea (%rsp,%r10,8),%rsp # tp=alloca(8*(num+2))
89 and \$-1024,%rsp # minimize TLB usage
90
91 mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp
92.Lmul_body:
93 mov $bp,%r12 # reassign $bp
94___
95 $bp="%r12";
96$code.=<<___;
97 mov ($n0),$n0 # pull n0[0] value
98 mov ($bp),$m0 # m0=bp[0]
99 mov ($ap),%rax
100
101 xor $i,$i # i=0
102 xor $j,$j # j=0
103
104 mov $n0,$m1
105 mulq $m0 # ap[0]*bp[0]
106 mov %rax,$lo0
107 mov ($np),%rax
108
109 imulq $lo0,$m1 # "tp[0]"*n0
110 mov %rdx,$hi0
111
112 mulq $m1 # np[0]*m1
113 add %rax,$lo0 # discarded
114 mov 8($ap),%rax
115 adc \$0,%rdx
116 mov %rdx,$hi1
117
118 lea 1($j),$j # j++
119 jmp .L1st_enter
120
121.align 16
122.L1st:
123 add %rax,$hi1
124 mov ($ap,$j,8),%rax
125 adc \$0,%rdx
126 add $hi0,$hi1 # np[j]*m1+ap[j]*bp[0]
127 mov $lo0,$hi0
128 adc \$0,%rdx
129 mov $hi1,-16(%rsp,$j,8) # tp[j-1]
130 mov %rdx,$hi1
131
132.L1st_enter:
133 mulq $m0 # ap[j]*bp[0]
134 add %rax,$hi0
135 mov ($np,$j,8),%rax
136 adc \$0,%rdx
137 lea 1($j),$j # j++
138 mov %rdx,$lo0
139
140 mulq $m1 # np[j]*m1
141 cmp $num,$j
142 jl .L1st
143
144 add %rax,$hi1
145 mov ($ap),%rax # ap[0]
146 adc \$0,%rdx
147 add $hi0,$hi1 # np[j]*m1+ap[j]*bp[0]
148 adc \$0,%rdx
149 mov $hi1,-16(%rsp,$j,8) # tp[j-1]
150 mov %rdx,$hi1
151 mov $lo0,$hi0
152
153 xor %rdx,%rdx
154 add $hi0,$hi1
155 adc \$0,%rdx
156 mov $hi1,-8(%rsp,$num,8)
157 mov %rdx,(%rsp,$num,8) # store upmost overflow bit
158
159 lea 1($i),$i # i++
160 jmp .Louter
161.align 16
162.Louter:
163 mov ($bp,$i,8),$m0 # m0=bp[i]
164 xor $j,$j # j=0
165 mov $n0,$m1
166 mov (%rsp),$lo0
167 mulq $m0 # ap[0]*bp[i]
168 add %rax,$lo0 # ap[0]*bp[i]+tp[0]
169 mov ($np),%rax
170 adc \$0,%rdx
171
172 imulq $lo0,$m1 # tp[0]*n0
173 mov %rdx,$hi0
174
175 mulq $m1 # np[0]*m1
176 add %rax,$lo0 # discarded
177 mov 8($ap),%rax
178 adc \$0,%rdx
179 mov 8(%rsp),$lo0 # tp[1]
180 mov %rdx,$hi1
181
182 lea 1($j),$j # j++
183 jmp .Linner_enter
184
185.align 16
186.Linner:
187 add %rax,$hi1
188 mov ($ap,$j,8),%rax
189 adc \$0,%rdx
190 add $lo0,$hi1 # np[j]*m1+ap[j]*bp[i]+tp[j]
191 mov (%rsp,$j,8),$lo0
192 adc \$0,%rdx
193 mov $hi1,-16(%rsp,$j,8) # tp[j-1]
194 mov %rdx,$hi1
195
196.Linner_enter:
197 mulq $m0 # ap[j]*bp[i]
198 add %rax,$hi0
199 mov ($np,$j,8),%rax
200 adc \$0,%rdx
201 add $hi0,$lo0 # ap[j]*bp[i]+tp[j]
202 mov %rdx,$hi0
203 adc \$0,$hi0
204 lea 1($j),$j # j++
205
206 mulq $m1 # np[j]*m1
207 cmp $num,$j
208 jl .Linner
209
210 add %rax,$hi1
211 mov ($ap),%rax # ap[0]
212 adc \$0,%rdx
213 add $lo0,$hi1 # np[j]*m1+ap[j]*bp[i]+tp[j]
214 mov (%rsp,$j,8),$lo0
215 adc \$0,%rdx
216 mov $hi1,-16(%rsp,$j,8) # tp[j-1]
217 mov %rdx,$hi1
218
219 xor %rdx,%rdx
220 add $hi0,$hi1
221 adc \$0,%rdx
222 add $lo0,$hi1 # pull upmost overflow bit
223 adc \$0,%rdx
224 mov $hi1,-8(%rsp,$num,8)
225 mov %rdx,(%rsp,$num,8) # store upmost overflow bit
226
227 lea 1($i),$i # i++
228 cmp $num,$i
229 jl .Louter
230
231 xor $i,$i # i=0 and clear CF!
232 mov (%rsp),%rax # tp[0]
233 lea (%rsp),$ap # borrow ap for tp
234 mov $num,$j # j=num
235 jmp .Lsub
236.align 16
237.Lsub: sbb ($np,$i,8),%rax
238 mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i]
239 mov 8($ap,$i,8),%rax # tp[i+1]
240 lea 1($i),$i # i++
241 dec $j # doesnn't affect CF!
242 jnz .Lsub
243
244 sbb \$0,%rax # handle upmost overflow bit
245 xor $i,$i
246 and %rax,$ap
247 not %rax
248 mov $rp,$np
249 and %rax,$np
250 mov $num,$j # j=num
251 or $np,$ap # ap=borrow?tp:rp
252.align 16
253.Lcopy: # copy or in-place refresh
254 mov ($ap,$i,8),%rax
255 mov $i,(%rsp,$i,8) # zap temporary vector
256 mov %rax,($rp,$i,8) # rp[i]=tp[i]
257 lea 1($i),$i
258 sub \$1,$j
259 jnz .Lcopy
260
261 mov 8(%rsp,$num,8),%rsi # restore %rsp
262 mov \$1,%rax
263 mov (%rsi),%r15
264 mov 8(%rsi),%r14
265 mov 16(%rsi),%r13
266 mov 24(%rsi),%r12
267 mov 32(%rsi),%rbp
268 mov 40(%rsi),%rbx
269 lea 48(%rsi),%rsp
270.Lmul_epilogue:
271 ret
272.size bn_mul_mont,.-bn_mul_mont
273___
274{{{
275my @A=("%r10","%r11");
276my @N=("%r13","%rdi");
277$code.=<<___;
278.type bn_mul4x_mont,\@function,6
279.align 16
280bn_mul4x_mont:
281.Lmul4x_enter:
282 _CET_ENDBR
283 push %rbx
284 push %rbp
285 push %r12
286 push %r13
287 push %r14
288 push %r15
289
290 mov ${num}d,${num}d
291 lea 4($num),%r10
292 mov %rsp,%r11
293 neg %r10
294 lea (%rsp,%r10,8),%rsp # tp=alloca(8*(num+4))
295 and \$-1024,%rsp # minimize TLB usage
296
297 mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp
298.Lmul4x_body:
299 mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp
300 mov %rdx,%r12 # reassign $bp
301___
302 $bp="%r12";
303$code.=<<___;
304 mov ($n0),$n0 # pull n0[0] value
305 mov ($bp),$m0 # m0=bp[0]
306 mov ($ap),%rax
307
308 xor $i,$i # i=0
309 xor $j,$j # j=0
310
311 mov $n0,$m1
312 mulq $m0 # ap[0]*bp[0]
313 mov %rax,$A[0]
314 mov ($np),%rax
315
316 imulq $A[0],$m1 # "tp[0]"*n0
317 mov %rdx,$A[1]
318
319 mulq $m1 # np[0]*m1
320 add %rax,$A[0] # discarded
321 mov 8($ap),%rax
322 adc \$0,%rdx
323 mov %rdx,$N[1]
324
325 mulq $m0
326 add %rax,$A[1]
327 mov 8($np),%rax
328 adc \$0,%rdx
329 mov %rdx,$A[0]
330
331 mulq $m1
332 add %rax,$N[1]
333 mov 16($ap),%rax
334 adc \$0,%rdx
335 add $A[1],$N[1]
336 lea 4($j),$j # j++
337 adc \$0,%rdx
338 mov $N[1],(%rsp)
339 mov %rdx,$N[0]
340 jmp .L1st4x
341.align 16
342.L1st4x:
343 mulq $m0 # ap[j]*bp[0]
344 add %rax,$A[0]
345 mov -16($np,$j,8),%rax
346 adc \$0,%rdx
347 mov %rdx,$A[1]
348
349 mulq $m1 # np[j]*m1
350 add %rax,$N[0]
351 mov -8($ap,$j,8),%rax
352 adc \$0,%rdx
353 add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0]
354 adc \$0,%rdx
355 mov $N[0],-24(%rsp,$j,8) # tp[j-1]
356 mov %rdx,$N[1]
357
358 mulq $m0 # ap[j]*bp[0]
359 add %rax,$A[1]
360 mov -8($np,$j,8),%rax
361 adc \$0,%rdx
362 mov %rdx,$A[0]
363
364 mulq $m1 # np[j]*m1
365 add %rax,$N[1]
366 mov ($ap,$j,8),%rax
367 adc \$0,%rdx
368 add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0]
369 adc \$0,%rdx
370 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
371 mov %rdx,$N[0]
372
373 mulq $m0 # ap[j]*bp[0]
374 add %rax,$A[0]
375 mov ($np,$j,8),%rax
376 adc \$0,%rdx
377 mov %rdx,$A[1]
378
379 mulq $m1 # np[j]*m1
380 add %rax,$N[0]
381 mov 8($ap,$j,8),%rax
382 adc \$0,%rdx
383 add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0]
384 adc \$0,%rdx
385 mov $N[0],-8(%rsp,$j,8) # tp[j-1]
386 mov %rdx,$N[1]
387
388 mulq $m0 # ap[j]*bp[0]
389 add %rax,$A[1]
390 mov 8($np,$j,8),%rax
391 adc \$0,%rdx
392 lea 4($j),$j # j++
393 mov %rdx,$A[0]
394
395 mulq $m1 # np[j]*m1
396 add %rax,$N[1]
397 mov -16($ap,$j,8),%rax
398 adc \$0,%rdx
399 add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0]
400 adc \$0,%rdx
401 mov $N[1],-32(%rsp,$j,8) # tp[j-1]
402 mov %rdx,$N[0]
403 cmp $num,$j
404 jl .L1st4x
405
406 mulq $m0 # ap[j]*bp[0]
407 add %rax,$A[0]
408 mov -16($np,$j,8),%rax
409 adc \$0,%rdx
410 mov %rdx,$A[1]
411
412 mulq $m1 # np[j]*m1
413 add %rax,$N[0]
414 mov -8($ap,$j,8),%rax
415 adc \$0,%rdx
416 add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0]
417 adc \$0,%rdx
418 mov $N[0],-24(%rsp,$j,8) # tp[j-1]
419 mov %rdx,$N[1]
420
421 mulq $m0 # ap[j]*bp[0]
422 add %rax,$A[1]
423 mov -8($np,$j,8),%rax
424 adc \$0,%rdx
425 mov %rdx,$A[0]
426
427 mulq $m1 # np[j]*m1
428 add %rax,$N[1]
429 mov ($ap),%rax # ap[0]
430 adc \$0,%rdx
431 add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0]
432 adc \$0,%rdx
433 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
434 mov %rdx,$N[0]
435
436 xor $N[1],$N[1]
437 add $A[0],$N[0]
438 adc \$0,$N[1]
439 mov $N[0],-8(%rsp,$j,8)
440 mov $N[1],(%rsp,$j,8) # store upmost overflow bit
441
442 lea 1($i),$i # i++
443.align 4
444.Louter4x:
445 mov ($bp,$i,8),$m0 # m0=bp[i]
446 xor $j,$j # j=0
447 mov (%rsp),$A[0]
448 mov $n0,$m1
449 mulq $m0 # ap[0]*bp[i]
450 add %rax,$A[0] # ap[0]*bp[i]+tp[0]
451 mov ($np),%rax
452 adc \$0,%rdx
453
454 imulq $A[0],$m1 # tp[0]*n0
455 mov %rdx,$A[1]
456
457 mulq $m1 # np[0]*m1
458 add %rax,$A[0] # "$N[0]", discarded
459 mov 8($ap),%rax
460 adc \$0,%rdx
461 mov %rdx,$N[1]
462
463 mulq $m0 # ap[j]*bp[i]
464 add %rax,$A[1]
465 mov 8($np),%rax
466 adc \$0,%rdx
467 add 8(%rsp),$A[1] # +tp[1]
468 adc \$0,%rdx
469 mov %rdx,$A[0]
470
471 mulq $m1 # np[j]*m1
472 add %rax,$N[1]
473 mov 16($ap),%rax
474 adc \$0,%rdx
475 add $A[1],$N[1] # np[j]*m1+ap[j]*bp[i]+tp[j]
476 lea 4($j),$j # j+=2
477 adc \$0,%rdx
478 mov $N[1],(%rsp) # tp[j-1]
479 mov %rdx,$N[0]
480 jmp .Linner4x
481.align 16
482.Linner4x:
483 mulq $m0 # ap[j]*bp[i]
484 add %rax,$A[0]
485 mov -16($np,$j,8),%rax
486 adc \$0,%rdx
487 add -16(%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j]
488 adc \$0,%rdx
489 mov %rdx,$A[1]
490
491 mulq $m1 # np[j]*m1
492 add %rax,$N[0]
493 mov -8($ap,$j,8),%rax
494 adc \$0,%rdx
495 add $A[0],$N[0]
496 adc \$0,%rdx
497 mov $N[0],-24(%rsp,$j,8) # tp[j-1]
498 mov %rdx,$N[1]
499
500 mulq $m0 # ap[j]*bp[i]
501 add %rax,$A[1]
502 mov -8($np,$j,8),%rax
503 adc \$0,%rdx
504 add -8(%rsp,$j,8),$A[1]
505 adc \$0,%rdx
506 mov %rdx,$A[0]
507
508 mulq $m1 # np[j]*m1
509 add %rax,$N[1]
510 mov ($ap,$j,8),%rax
511 adc \$0,%rdx
512 add $A[1],$N[1]
513 adc \$0,%rdx
514 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
515 mov %rdx,$N[0]
516
517 mulq $m0 # ap[j]*bp[i]
518 add %rax,$A[0]
519 mov ($np,$j,8),%rax
520 adc \$0,%rdx
521 add (%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j]
522 adc \$0,%rdx
523 mov %rdx,$A[1]
524
525 mulq $m1 # np[j]*m1
526 add %rax,$N[0]
527 mov 8($ap,$j,8),%rax
528 adc \$0,%rdx
529 add $A[0],$N[0]
530 adc \$0,%rdx
531 mov $N[0],-8(%rsp,$j,8) # tp[j-1]
532 mov %rdx,$N[1]
533
534 mulq $m0 # ap[j]*bp[i]
535 add %rax,$A[1]
536 mov 8($np,$j,8),%rax
537 adc \$0,%rdx
538 add 8(%rsp,$j,8),$A[1]
539 adc \$0,%rdx
540 lea 4($j),$j # j++
541 mov %rdx,$A[0]
542
543 mulq $m1 # np[j]*m1
544 add %rax,$N[1]
545 mov -16($ap,$j,8),%rax
546 adc \$0,%rdx
547 add $A[1],$N[1]
548 adc \$0,%rdx
549 mov $N[1],-32(%rsp,$j,8) # tp[j-1]
550 mov %rdx,$N[0]
551 cmp $num,$j
552 jl .Linner4x
553
554 mulq $m0 # ap[j]*bp[i]
555 add %rax,$A[0]
556 mov -16($np,$j,8),%rax
557 adc \$0,%rdx
558 add -16(%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j]
559 adc \$0,%rdx
560 mov %rdx,$A[1]
561
562 mulq $m1 # np[j]*m1
563 add %rax,$N[0]
564 mov -8($ap,$j,8),%rax
565 adc \$0,%rdx
566 add $A[0],$N[0]
567 adc \$0,%rdx
568 mov $N[0],-24(%rsp,$j,8) # tp[j-1]
569 mov %rdx,$N[1]
570
571 mulq $m0 # ap[j]*bp[i]
572 add %rax,$A[1]
573 mov -8($np,$j,8),%rax
574 adc \$0,%rdx
575 add -8(%rsp,$j,8),$A[1]
576 adc \$0,%rdx
577 lea 1($i),$i # i++
578 mov %rdx,$A[0]
579
580 mulq $m1 # np[j]*m1
581 add %rax,$N[1]
582 mov ($ap),%rax # ap[0]
583 adc \$0,%rdx
584 add $A[1],$N[1]
585 adc \$0,%rdx
586 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
587 mov %rdx,$N[0]
588
589 xor $N[1],$N[1]
590 add $A[0],$N[0]
591 adc \$0,$N[1]
592 add (%rsp,$num,8),$N[0] # pull upmost overflow bit
593 adc \$0,$N[1]
594 mov $N[0],-8(%rsp,$j,8)
595 mov $N[1],(%rsp,$j,8) # store upmost overflow bit
596
597 cmp $num,$i
598 jl .Louter4x
599___
600{
601my @ri=("%rax","%rdx",$m0,$m1);
602$code.=<<___;
603 mov 16(%rsp,$num,8),$rp # restore $rp
604 mov 0(%rsp),@ri[0] # tp[0]
605 pxor %xmm0,%xmm0
606 mov 8(%rsp),@ri[1] # tp[1]
607 shr \$2,$num # num/=4
608 lea (%rsp),$ap # borrow ap for tp
609 xor $i,$i # i=0 and clear CF!
610
611 sub 0($np),@ri[0]
612 mov 16($ap),@ri[2] # tp[2]
613 mov 24($ap),@ri[3] # tp[3]
614 sbb 8($np),@ri[1]
615 lea -1($num),$j # j=num/4-1
616 jmp .Lsub4x
617.align 16
618.Lsub4x:
619 mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i]
620 mov @ri[1],8($rp,$i,8) # rp[i]=tp[i]-np[i]
621 sbb 16($np,$i,8),@ri[2]
622 mov 32($ap,$i,8),@ri[0] # tp[i+1]
623 mov 40($ap,$i,8),@ri[1]
624 sbb 24($np,$i,8),@ri[3]
625 mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i]
626 mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i]
627 sbb 32($np,$i,8),@ri[0]
628 mov 48($ap,$i,8),@ri[2]
629 mov 56($ap,$i,8),@ri[3]
630 sbb 40($np,$i,8),@ri[1]
631 lea 4($i),$i # i++
632 dec $j # doesnn't affect CF!
633 jnz .Lsub4x
634
635 mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i]
636 mov 32($ap,$i,8),@ri[0] # load overflow bit
637 sbb 16($np,$i,8),@ri[2]
638 mov @ri[1],8($rp,$i,8) # rp[i]=tp[i]-np[i]
639 sbb 24($np,$i,8),@ri[3]
640 mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i]
641
642 sbb \$0,@ri[0] # handle upmost overflow bit
643 mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i]
644 xor $i,$i # i=0
645 and @ri[0],$ap
646 not @ri[0]
647 mov $rp,$np
648 and @ri[0],$np
649 lea -1($num),$j
650 or $np,$ap # ap=borrow?tp:rp
651
652 movdqu ($ap),%xmm1
653 movdqa %xmm0,(%rsp)
654 movdqu %xmm1,($rp)
655 jmp .Lcopy4x
656.align 16
657.Lcopy4x: # copy or in-place refresh
658 movdqu 16($ap,$i),%xmm2
659 movdqu 32($ap,$i),%xmm1
660 movdqa %xmm0,16(%rsp,$i)
661 movdqu %xmm2,16($rp,$i)
662 movdqa %xmm0,32(%rsp,$i)
663 movdqu %xmm1,32($rp,$i)
664 lea 32($i),$i
665 dec $j
666 jnz .Lcopy4x
667
668 shl \$2,$num
669 movdqu 16($ap,$i),%xmm2
670 movdqa %xmm0,16(%rsp,$i)
671 movdqu %xmm2,16($rp,$i)
672___
673}
674$code.=<<___;
675 mov 8(%rsp,$num,8),%rsi # restore %rsp
676 mov \$1,%rax
677 mov (%rsi),%r15
678 mov 8(%rsi),%r14
679 mov 16(%rsi),%r13
680 mov 24(%rsi),%r12
681 mov 32(%rsi),%rbp
682 mov 40(%rsi),%rbx
683 lea 48(%rsi),%rsp
684.Lmul4x_epilogue:
685 ret
686.size bn_mul4x_mont,.-bn_mul4x_mont
687___
688}}}
689 {{{
690######################################################################
691# void bn_sqr4x_mont(
692my $rptr="%rdi"; # const BN_ULONG *rptr,
693my $aptr="%rsi"; # const BN_ULONG *aptr,
694my $bptr="%rdx"; # not used
695my $nptr="%rcx"; # const BN_ULONG *nptr,
696my $n0 ="%r8"; # const BN_ULONG *n0);
697my $num ="%r9"; # int num, has to be divisible by 4 and
698 # not less than 8
699
700my ($i,$j,$tptr)=("%rbp","%rcx",$rptr);
701my @A0=("%r10","%r11");
702my @A1=("%r12","%r13");
703my ($a0,$a1,$ai)=("%r14","%r15","%rbx");
704
705$code.=<<___;
706.type bn_sqr4x_mont,\@function,6
707.align 16
708bn_sqr4x_mont:
709.Lsqr4x_enter:
710 _CET_ENDBR
711 push %rbx
712 push %rbp
713 push %r12
714 push %r13
715 push %r14
716 push %r15
717
718 shl \$3,${num}d # convert $num to bytes
719 xor %r10,%r10
720 mov %rsp,%r11 # put aside %rsp
721 sub $num,%r10 # -$num
722 mov ($n0),$n0 # *n0
723 lea -72(%rsp,%r10,2),%rsp # alloca(frame+2*$num)
724 and \$-1024,%rsp # minimize TLB usage
725 ##############################################################
726 # Stack layout
727 #
728 # +0 saved $num, used in reduction section
729 # +8 &t[2*$num], used in reduction section
730 # +32 saved $rptr
731 # +40 saved $nptr
732 # +48 saved *n0
733 # +56 saved %rsp
734 # +64 t[2*$num]
735 #
736 mov $rptr,32(%rsp) # save $rptr
737 mov $nptr,40(%rsp)
738 mov $n0, 48(%rsp)
739 mov %r11, 56(%rsp) # save original %rsp
740.Lsqr4x_body:
741 ##############################################################
742 # Squaring part:
743 #
744 # a) multiply-n-add everything but a[i]*a[i];
745 # b) shift result of a) by 1 to the left and accumulate
746 # a[i]*a[i] products;
747 #
748 lea 32(%r10),$i # $i=-($num-32)
749 lea ($aptr,$num),$aptr # end of a[] buffer, ($aptr,$i)=&ap[2]
750
751 mov $num,$j # $j=$num
752
753 # comments apply to $num==8 case
754 mov -32($aptr,$i),$a0 # a[0]
755 lea 64(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num]
756 mov -24($aptr,$i),%rax # a[1]
757 lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"]
758 mov -16($aptr,$i),$ai # a[2]
759 mov %rax,$a1
760
761 mul $a0 # a[1]*a[0]
762 mov %rax,$A0[0] # a[1]*a[0]
763 mov $ai,%rax # a[2]
764 mov %rdx,$A0[1]
765 mov $A0[0],-24($tptr,$i) # t[1]
766
767 xor $A0[0],$A0[0]
768 mul $a0 # a[2]*a[0]
769 add %rax,$A0[1]
770 mov $ai,%rax
771 adc %rdx,$A0[0]
772 mov $A0[1],-16($tptr,$i) # t[2]
773
774 lea -16($i),$j # j=-16
775
776
777 mov 8($aptr,$j),$ai # a[3]
778 mul $a1 # a[2]*a[1]
779 mov %rax,$A1[0] # a[2]*a[1]+t[3]
780 mov $ai,%rax
781 mov %rdx,$A1[1]
782
783 xor $A0[1],$A0[1]
784 add $A1[0],$A0[0]
785 lea 16($j),$j
786 adc \$0,$A0[1]
787 mul $a0 # a[3]*a[0]
788 add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3]
789 mov $ai,%rax
790 adc %rdx,$A0[1]
791 mov $A0[0],-8($tptr,$j) # t[3]
792 jmp .Lsqr4x_1st
793
794.align 16
795.Lsqr4x_1st:
796 mov ($aptr,$j),$ai # a[4]
797 xor $A1[0],$A1[0]
798 mul $a1 # a[3]*a[1]
799 add %rax,$A1[1] # a[3]*a[1]+t[4]
800 mov $ai,%rax
801 adc %rdx,$A1[0]
802
803 xor $A0[0],$A0[0]
804 add $A1[1],$A0[1]
805 adc \$0,$A0[0]
806 mul $a0 # a[4]*a[0]
807 add %rax,$A0[1] # a[4]*a[0]+a[3]*a[1]+t[4]
808 mov $ai,%rax # a[3]
809 adc %rdx,$A0[0]
810 mov $A0[1],($tptr,$j) # t[4]
811
812
813 mov 8($aptr,$j),$ai # a[5]
814 xor $A1[1],$A1[1]
815 mul $a1 # a[4]*a[3]
816 add %rax,$A1[0] # a[4]*a[3]+t[5]
817 mov $ai,%rax
818 adc %rdx,$A1[1]
819
820 xor $A0[1],$A0[1]
821 add $A1[0],$A0[0]
822 adc \$0,$A0[1]
823 mul $a0 # a[5]*a[2]
824 add %rax,$A0[0] # a[5]*a[2]+a[4]*a[3]+t[5]
825 mov $ai,%rax
826 adc %rdx,$A0[1]
827 mov $A0[0],8($tptr,$j) # t[5]
828
829 mov 16($aptr,$j),$ai # a[6]
830 xor $A1[0],$A1[0]
831 mul $a1 # a[5]*a[3]
832 add %rax,$A1[1] # a[5]*a[3]+t[6]
833 mov $ai,%rax
834 adc %rdx,$A1[0]
835
836 xor $A0[0],$A0[0]
837 add $A1[1],$A0[1]
838 adc \$0,$A0[0]
839 mul $a0 # a[6]*a[2]
840 add %rax,$A0[1] # a[6]*a[2]+a[5]*a[3]+t[6]
841 mov $ai,%rax # a[3]
842 adc %rdx,$A0[0]
843 mov $A0[1],16($tptr,$j) # t[6]
844
845
846 mov 24($aptr,$j),$ai # a[7]
847 xor $A1[1],$A1[1]
848 mul $a1 # a[6]*a[5]
849 add %rax,$A1[0] # a[6]*a[5]+t[7]
850 mov $ai,%rax
851 adc %rdx,$A1[1]
852
853 xor $A0[1],$A0[1]
854 add $A1[0],$A0[0]
855 lea 32($j),$j
856 adc \$0,$A0[1]
857 mul $a0 # a[7]*a[4]
858 add %rax,$A0[0] # a[7]*a[4]+a[6]*a[5]+t[6]
859 mov $ai,%rax
860 adc %rdx,$A0[1]
861 mov $A0[0],-8($tptr,$j) # t[7]
862
863 cmp \$0,$j
864 jne .Lsqr4x_1st
865
866 xor $A1[0],$A1[0]
867 add $A0[1],$A1[1]
868 adc \$0,$A1[0]
869 mul $a1 # a[7]*a[5]
870 add %rax,$A1[1]
871 adc %rdx,$A1[0]
872
873 mov $A1[1],($tptr) # t[8]
874 lea 16($i),$i
875 mov $A1[0],8($tptr) # t[9]
876 jmp .Lsqr4x_outer
877
878.align 16
879.Lsqr4x_outer: # comments apply to $num==6 case
880 mov -32($aptr,$i),$a0 # a[0]
881 lea 64(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num]
882 mov -24($aptr,$i),%rax # a[1]
883 lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"]
884 mov -16($aptr,$i),$ai # a[2]
885 mov %rax,$a1
886
887 mov -24($tptr,$i),$A0[0] # t[1]
888 xor $A0[1],$A0[1]
889 mul $a0 # a[1]*a[0]
890 add %rax,$A0[0] # a[1]*a[0]+t[1]
891 mov $ai,%rax # a[2]
892 adc %rdx,$A0[1]
893 mov $A0[0],-24($tptr,$i) # t[1]
894
895 xor $A0[0],$A0[0]
896 add -16($tptr,$i),$A0[1] # a[2]*a[0]+t[2]
897 adc \$0,$A0[0]
898 mul $a0 # a[2]*a[0]
899 add %rax,$A0[1]
900 mov $ai,%rax
901 adc %rdx,$A0[0]
902 mov $A0[1],-16($tptr,$i) # t[2]
903
904 lea -16($i),$j # j=-16
905 xor $A1[0],$A1[0]
906
907
908 mov 8($aptr,$j),$ai # a[3]
909 xor $A1[1],$A1[1]
910 add 8($tptr,$j),$A1[0]
911 adc \$0,$A1[1]
912 mul $a1 # a[2]*a[1]
913 add %rax,$A1[0] # a[2]*a[1]+t[3]
914 mov $ai,%rax
915 adc %rdx,$A1[1]
916
917 xor $A0[1],$A0[1]
918 add $A1[0],$A0[0]
919 adc \$0,$A0[1]
920 mul $a0 # a[3]*a[0]
921 add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3]
922 mov $ai,%rax
923 adc %rdx,$A0[1]
924 mov $A0[0],8($tptr,$j) # t[3]
925
926 lea 16($j),$j
927 jmp .Lsqr4x_inner
928
929.align 16
930.Lsqr4x_inner:
931 mov ($aptr,$j),$ai # a[4]
932 xor $A1[0],$A1[0]
933 add ($tptr,$j),$A1[1]
934 adc \$0,$A1[0]
935 mul $a1 # a[3]*a[1]
936 add %rax,$A1[1] # a[3]*a[1]+t[4]
937 mov $ai,%rax
938 adc %rdx,$A1[0]
939
940 xor $A0[0],$A0[0]
941 add $A1[1],$A0[1]
942 adc \$0,$A0[0]
943 mul $a0 # a[4]*a[0]
944 add %rax,$A0[1] # a[4]*a[0]+a[3]*a[1]+t[4]
945 mov $ai,%rax # a[3]
946 adc %rdx,$A0[0]
947 mov $A0[1],($tptr,$j) # t[4]
948
949 mov 8($aptr,$j),$ai # a[5]
950 xor $A1[1],$A1[1]
951 add 8($tptr,$j),$A1[0]
952 adc \$0,$A1[1]
953 mul $a1 # a[4]*a[3]
954 add %rax,$A1[0] # a[4]*a[3]+t[5]
955 mov $ai,%rax
956 adc %rdx,$A1[1]
957
958 xor $A0[1],$A0[1]
959 add $A1[0],$A0[0]
960 lea 16($j),$j # j++
961 adc \$0,$A0[1]
962 mul $a0 # a[5]*a[2]
963 add %rax,$A0[0] # a[5]*a[2]+a[4]*a[3]+t[5]
964 mov $ai,%rax
965 adc %rdx,$A0[1]
966 mov $A0[0],-8($tptr,$j) # t[5], "preloaded t[1]" below
967
968 cmp \$0,$j
969 jne .Lsqr4x_inner
970
971 xor $A1[0],$A1[0]
972 add $A0[1],$A1[1]
973 adc \$0,$A1[0]
974 mul $a1 # a[5]*a[3]
975 add %rax,$A1[1]
976 adc %rdx,$A1[0]
977
978 mov $A1[1],($tptr) # t[6], "preloaded t[2]" below
979 mov $A1[0],8($tptr) # t[7], "preloaded t[3]" below
980
981 add \$16,$i
982 jnz .Lsqr4x_outer
983
984 # comments apply to $num==4 case
985 mov -32($aptr),$a0 # a[0]
986 lea 64(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num]
987 mov -24($aptr),%rax # a[1]
988 lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"]
989 mov -16($aptr),$ai # a[2]
990 mov %rax,$a1
991
992 xor $A0[1],$A0[1]
993 mul $a0 # a[1]*a[0]
994 add %rax,$A0[0] # a[1]*a[0]+t[1], preloaded t[1]
995 mov $ai,%rax # a[2]
996 adc %rdx,$A0[1]
997 mov $A0[0],-24($tptr) # t[1]
998
999 xor $A0[0],$A0[0]
1000 add $A1[1],$A0[1] # a[2]*a[0]+t[2], preloaded t[2]
1001 adc \$0,$A0[0]
1002 mul $a0 # a[2]*a[0]
1003 add %rax,$A0[1]
1004 mov $ai,%rax
1005 adc %rdx,$A0[0]
1006 mov $A0[1],-16($tptr) # t[2]
1007
1008 mov -8($aptr),$ai # a[3]
1009 mul $a1 # a[2]*a[1]
1010 add %rax,$A1[0] # a[2]*a[1]+t[3], preloaded t[3]
1011 mov $ai,%rax
1012 adc \$0,%rdx
1013
1014 xor $A0[1],$A0[1]
1015 add $A1[0],$A0[0]
1016 mov %rdx,$A1[1]
1017 adc \$0,$A0[1]
1018 mul $a0 # a[3]*a[0]
1019 add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3]
1020 mov $ai,%rax
1021 adc %rdx,$A0[1]
1022 mov $A0[0],-8($tptr) # t[3]
1023
1024 xor $A1[0],$A1[0]
1025 add $A0[1],$A1[1]
1026 adc \$0,$A1[0]
1027 mul $a1 # a[3]*a[1]
1028 add %rax,$A1[1]
1029 mov -16($aptr),%rax # a[2]
1030 adc %rdx,$A1[0]
1031
1032 mov $A1[1],($tptr) # t[4]
1033 mov $A1[0],8($tptr) # t[5]
1034
1035 mul $ai # a[2]*a[3]
1036___
1037{
1038my ($shift,$carry)=($a0,$a1);
1039my @S=(@A1,$ai,$n0);
1040$code.=<<___;
1041 add \$16,$i
1042 xor $shift,$shift
1043 sub $num,$i # $i=16-$num
1044 xor $carry,$carry
1045
1046 add $A1[0],%rax # t[5]
1047 adc \$0,%rdx
1048 mov %rax,8($tptr) # t[5]
1049 mov %rdx,16($tptr) # t[6]
1050 mov $carry,24($tptr) # t[7]
1051
1052 mov -16($aptr,$i),%rax # a[0]
1053 lea 64(%rsp,$num,2),$tptr
1054 xor $A0[0],$A0[0] # t[0]
1055 mov -24($tptr,$i,2),$A0[1] # t[1]
1056
1057 lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
1058 shr \$63,$A0[0]
1059 lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 |
1060 shr \$63,$A0[1]
1061 or $A0[0],$S[1] # | t[2*i]>>63
1062 mov -16($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch
1063 mov $A0[1],$shift # shift=t[2*i+1]>>63
1064 mul %rax # a[i]*a[i]
1065 neg $carry # mov $carry,cf
1066 mov -8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch
1067 adc %rax,$S[0]
1068 mov -8($aptr,$i),%rax # a[i+1] # prefetch
1069 mov $S[0],-32($tptr,$i,2)
1070 adc %rdx,$S[1]
1071
1072 lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift
1073 mov $S[1],-24($tptr,$i,2)
1074 sbb $carry,$carry # mov cf,$carry
1075 shr \$63,$A0[0]
1076 lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 |
1077 shr \$63,$A0[1]
1078 or $A0[0],$S[3] # | t[2*i]>>63
1079 mov 0($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch
1080 mov $A0[1],$shift # shift=t[2*i+1]>>63
1081 mul %rax # a[i]*a[i]
1082 neg $carry # mov $carry,cf
1083 mov 8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch
1084 adc %rax,$S[2]
1085 mov 0($aptr,$i),%rax # a[i+1] # prefetch
1086 mov $S[2],-16($tptr,$i,2)
1087 adc %rdx,$S[3]
1088 lea 16($i),$i
1089 mov $S[3],-40($tptr,$i,2)
1090 sbb $carry,$carry # mov cf,$carry
1091 jmp .Lsqr4x_shift_n_add
1092
1093.align 16
1094.Lsqr4x_shift_n_add:
1095 lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
1096 shr \$63,$A0[0]
1097 lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 |
1098 shr \$63,$A0[1]
1099 or $A0[0],$S[1] # | t[2*i]>>63
1100 mov -16($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch
1101 mov $A0[1],$shift # shift=t[2*i+1]>>63
1102 mul %rax # a[i]*a[i]
1103 neg $carry # mov $carry,cf
1104 mov -8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch
1105 adc %rax,$S[0]
1106 mov -8($aptr,$i),%rax # a[i+1] # prefetch
1107 mov $S[0],-32($tptr,$i,2)
1108 adc %rdx,$S[1]
1109
1110 lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift
1111 mov $S[1],-24($tptr,$i,2)
1112 sbb $carry,$carry # mov cf,$carry
1113 shr \$63,$A0[0]
1114 lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 |
1115 shr \$63,$A0[1]
1116 or $A0[0],$S[3] # | t[2*i]>>63
1117 mov 0($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch
1118 mov $A0[1],$shift # shift=t[2*i+1]>>63
1119 mul %rax # a[i]*a[i]
1120 neg $carry # mov $carry,cf
1121 mov 8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch
1122 adc %rax,$S[2]
1123 mov 0($aptr,$i),%rax # a[i+1] # prefetch
1124 mov $S[2],-16($tptr,$i,2)
1125 adc %rdx,$S[3]
1126
1127 lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
1128 mov $S[3],-8($tptr,$i,2)
1129 sbb $carry,$carry # mov cf,$carry
1130 shr \$63,$A0[0]
1131 lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 |
1132 shr \$63,$A0[1]
1133 or $A0[0],$S[1] # | t[2*i]>>63
1134 mov 16($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch
1135 mov $A0[1],$shift # shift=t[2*i+1]>>63
1136 mul %rax # a[i]*a[i]
1137 neg $carry # mov $carry,cf
1138 mov 24($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch
1139 adc %rax,$S[0]
1140 mov 8($aptr,$i),%rax # a[i+1] # prefetch
1141 mov $S[0],0($tptr,$i,2)
1142 adc %rdx,$S[1]
1143
1144 lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift
1145 mov $S[1],8($tptr,$i,2)
1146 sbb $carry,$carry # mov cf,$carry
1147 shr \$63,$A0[0]
1148 lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 |
1149 shr \$63,$A0[1]
1150 or $A0[0],$S[3] # | t[2*i]>>63
1151 mov 32($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch
1152 mov $A0[1],$shift # shift=t[2*i+1]>>63
1153 mul %rax # a[i]*a[i]
1154 neg $carry # mov $carry,cf
1155 mov 40($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch
1156 adc %rax,$S[2]
1157 mov 16($aptr,$i),%rax # a[i+1] # prefetch
1158 mov $S[2],16($tptr,$i,2)
1159 adc %rdx,$S[3]
1160 mov $S[3],24($tptr,$i,2)
1161 sbb $carry,$carry # mov cf,$carry
1162 add \$32,$i
1163 jnz .Lsqr4x_shift_n_add
1164
1165 lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
1166 shr \$63,$A0[0]
1167 lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 |
1168 shr \$63,$A0[1]
1169 or $A0[0],$S[1] # | t[2*i]>>63
1170 mov -16($tptr),$A0[0] # t[2*i+2] # prefetch
1171 mov $A0[1],$shift # shift=t[2*i+1]>>63
1172 mul %rax # a[i]*a[i]
1173 neg $carry # mov $carry,cf
1174 mov -8($tptr),$A0[1] # t[2*i+2+1] # prefetch
1175 adc %rax,$S[0]
1176 mov -8($aptr),%rax # a[i+1] # prefetch
1177 mov $S[0],-32($tptr)
1178 adc %rdx,$S[1]
1179
1180 lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1|shift
1181 mov $S[1],-24($tptr)
1182 sbb $carry,$carry # mov cf,$carry
1183 shr \$63,$A0[0]
1184 lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 |
1185 shr \$63,$A0[1]
1186 or $A0[0],$S[3] # | t[2*i]>>63
1187 mul %rax # a[i]*a[i]
1188 neg $carry # mov $carry,cf
1189 adc %rax,$S[2]
1190 adc %rdx,$S[3]
1191 mov $S[2],-16($tptr)
1192 mov $S[3],-8($tptr)
1193___
1194}
1195##############################################################
1196# Montgomery reduction part, "word-by-word" algorithm.
1197#
1198{
1199my ($topbit,$nptr)=("%rbp",$aptr);
1200my ($m0,$m1)=($a0,$a1);
1201my @Ni=("%rbx","%r9");
1202$code.=<<___;
1203 mov 40(%rsp),$nptr # restore $nptr
1204 mov 48(%rsp),$n0 # restore *n0
1205 xor $j,$j
1206 mov $num,0(%rsp) # save $num
1207 sub $num,$j # $j=-$num
1208 mov 64(%rsp),$A0[0] # t[0] # modsched #
1209 mov $n0,$m0 # # modsched #
1210 lea 64(%rsp,$num,2),%rax # end of t[] buffer
1211 lea 64(%rsp,$num),$tptr # end of t[] window
1212 mov %rax,8(%rsp) # save end of t[] buffer
1213 lea ($nptr,$num),$nptr # end of n[] buffer
1214 xor $topbit,$topbit # $topbit=0
1215
1216 mov 0($nptr,$j),%rax # n[0] # modsched #
1217 mov 8($nptr,$j),$Ni[1] # n[1] # modsched #
1218 imulq $A0[0],$m0 # m0=t[0]*n0 # modsched #
1219 mov %rax,$Ni[0] # # modsched #
1220 jmp .Lsqr4x_mont_outer
1221
1222.align 16
1223.Lsqr4x_mont_outer:
1224 xor $A0[1],$A0[1]
1225 mul $m0 # n[0]*m0
1226 add %rax,$A0[0] # n[0]*m0+t[0]
1227 mov $Ni[1],%rax
1228 adc %rdx,$A0[1]
1229 mov $n0,$m1
1230
1231 xor $A0[0],$A0[0]
1232 add 8($tptr,$j),$A0[1]
1233 adc \$0,$A0[0]
1234 mul $m0 # n[1]*m0
1235 add %rax,$A0[1] # n[1]*m0+t[1]
1236 mov $Ni[0],%rax
1237 adc %rdx,$A0[0]
1238
1239 imulq $A0[1],$m1
1240
1241 mov 16($nptr,$j),$Ni[0] # n[2]
1242 xor $A1[1],$A1[1]
1243 add $A0[1],$A1[0]
1244 adc \$0,$A1[1]
1245 mul $m1 # n[0]*m1
1246 add %rax,$A1[0] # n[0]*m1+"t[1]"
1247 mov $Ni[0],%rax
1248 adc %rdx,$A1[1]
1249 mov $A1[0],8($tptr,$j) # "t[1]"
1250
1251 xor $A0[1],$A0[1]
1252 add 16($tptr,$j),$A0[0]
1253 adc \$0,$A0[1]
1254 mul $m0 # n[2]*m0
1255 add %rax,$A0[0] # n[2]*m0+t[2]
1256 mov $Ni[1],%rax
1257 adc %rdx,$A0[1]
1258
1259 mov 24($nptr,$j),$Ni[1] # n[3]
1260 xor $A1[0],$A1[0]
1261 add $A0[0],$A1[1]
1262 adc \$0,$A1[0]
1263 mul $m1 # n[1]*m1
1264 add %rax,$A1[1] # n[1]*m1+"t[2]"
1265 mov $Ni[1],%rax
1266 adc %rdx,$A1[0]
1267 mov $A1[1],16($tptr,$j) # "t[2]"
1268
1269 xor $A0[0],$A0[0]
1270 add 24($tptr,$j),$A0[1]
1271 lea 32($j),$j
1272 adc \$0,$A0[0]
1273 mul $m0 # n[3]*m0
1274 add %rax,$A0[1] # n[3]*m0+t[3]
1275 mov $Ni[0],%rax
1276 adc %rdx,$A0[0]
1277 jmp .Lsqr4x_mont_inner
1278
1279.align 16
1280.Lsqr4x_mont_inner:
1281 mov ($nptr,$j),$Ni[0] # n[4]
1282 xor $A1[1],$A1[1]
1283 add $A0[1],$A1[0]
1284 adc \$0,$A1[1]
1285 mul $m1 # n[2]*m1
1286 add %rax,$A1[0] # n[2]*m1+"t[3]"
1287 mov $Ni[0],%rax
1288 adc %rdx,$A1[1]
1289 mov $A1[0],-8($tptr,$j) # "t[3]"
1290
1291 xor $A0[1],$A0[1]
1292 add ($tptr,$j),$A0[0]
1293 adc \$0,$A0[1]
1294 mul $m0 # n[4]*m0
1295 add %rax,$A0[0] # n[4]*m0+t[4]
1296 mov $Ni[1],%rax
1297 adc %rdx,$A0[1]
1298
1299 mov 8($nptr,$j),$Ni[1] # n[5]
1300 xor $A1[0],$A1[0]
1301 add $A0[0],$A1[1]
1302 adc \$0,$A1[0]
1303 mul $m1 # n[3]*m1
1304 add %rax,$A1[1] # n[3]*m1+"t[4]"
1305 mov $Ni[1],%rax
1306 adc %rdx,$A1[0]
1307 mov $A1[1],($tptr,$j) # "t[4]"
1308
1309 xor $A0[0],$A0[0]
1310 add 8($tptr,$j),$A0[1]
1311 adc \$0,$A0[0]
1312 mul $m0 # n[5]*m0
1313 add %rax,$A0[1] # n[5]*m0+t[5]
1314 mov $Ni[0],%rax
1315 adc %rdx,$A0[0]
1316
1317
1318 mov 16($nptr,$j),$Ni[0] # n[6]
1319 xor $A1[1],$A1[1]
1320 add $A0[1],$A1[0]
1321 adc \$0,$A1[1]
1322 mul $m1 # n[4]*m1
1323 add %rax,$A1[0] # n[4]*m1+"t[5]"
1324 mov $Ni[0],%rax
1325 adc %rdx,$A1[1]
1326 mov $A1[0],8($tptr,$j) # "t[5]"
1327
1328 xor $A0[1],$A0[1]
1329 add 16($tptr,$j),$A0[0]
1330 adc \$0,$A0[1]
1331 mul $m0 # n[6]*m0
1332 add %rax,$A0[0] # n[6]*m0+t[6]
1333 mov $Ni[1],%rax
1334 adc %rdx,$A0[1]
1335
1336 mov 24($nptr,$j),$Ni[1] # n[7]
1337 xor $A1[0],$A1[0]
1338 add $A0[0],$A1[1]
1339 adc \$0,$A1[0]
1340 mul $m1 # n[5]*m1
1341 add %rax,$A1[1] # n[5]*m1+"t[6]"
1342 mov $Ni[1],%rax
1343 adc %rdx,$A1[0]
1344 mov $A1[1],16($tptr,$j) # "t[6]"
1345
1346 xor $A0[0],$A0[0]
1347 add 24($tptr,$j),$A0[1]
1348 lea 32($j),$j
1349 adc \$0,$A0[0]
1350 mul $m0 # n[7]*m0
1351 add %rax,$A0[1] # n[7]*m0+t[7]
1352 mov $Ni[0],%rax
1353 adc %rdx,$A0[0]
1354 cmp \$0,$j
1355 jne .Lsqr4x_mont_inner
1356
1357 sub 0(%rsp),$j # $j=-$num # modsched #
1358 mov $n0,$m0 # # modsched #
1359
1360 xor $A1[1],$A1[1]
1361 add $A0[1],$A1[0]
1362 adc \$0,$A1[1]
1363 mul $m1 # n[6]*m1
1364 add %rax,$A1[0] # n[6]*m1+"t[7]"
1365 mov $Ni[1],%rax
1366 adc %rdx,$A1[1]
1367 mov $A1[0],-8($tptr) # "t[7]"
1368
1369 xor $A0[1],$A0[1]
1370 add ($tptr),$A0[0] # +t[8]
1371 adc \$0,$A0[1]
1372 mov 0($nptr,$j),$Ni[0] # n[0] # modsched #
1373 add $topbit,$A0[0]
1374 adc \$0,$A0[1]
1375
1376 imulq 16($tptr,$j),$m0 # m0=t[0]*n0 # modsched #
1377 xor $A1[0],$A1[0]
1378 mov 8($nptr,$j),$Ni[1] # n[1] # modsched #
1379 add $A0[0],$A1[1]
1380 mov 16($tptr,$j),$A0[0] # t[0] # modsched #
1381 adc \$0,$A1[0]
1382 mul $m1 # n[7]*m1
1383 add %rax,$A1[1] # n[7]*m1+"t[8]"
1384 mov $Ni[0],%rax # # modsched #
1385 adc %rdx,$A1[0]
1386 mov $A1[1],($tptr) # "t[8]"
1387
1388 xor $topbit,$topbit
1389 add 8($tptr),$A1[0] # +t[9]
1390 adc $topbit,$topbit
1391 add $A0[1],$A1[0]
1392 lea 16($tptr),$tptr # "t[$num]>>128"
1393 adc \$0,$topbit
1394 mov $A1[0],-8($tptr) # "t[9]"
1395 cmp 8(%rsp),$tptr # are we done?
1396 jb .Lsqr4x_mont_outer
1397
1398 mov 0(%rsp),$num # restore $num
1399 mov $topbit,($tptr) # save $topbit
1400___
1401}
1402##############################################################
1403# Post-condition, 4x unrolled copy from bn_mul_mont
1404#
1405{
1406my ($tptr,$nptr)=("%rbx",$aptr);
1407my @ri=("%rax","%rdx","%r10","%r11");
1408$code.=<<___;
1409 mov 64(%rsp,$num),@ri[0] # tp[0]
1410 lea 64(%rsp,$num),$tptr # upper half of t[2*$num] holds result
1411 mov 40(%rsp),$nptr # restore $nptr
1412 shr \$5,$num # num/4
1413 mov 8($tptr),@ri[1] # t[1]
1414 xor $i,$i # i=0 and clear CF!
1415
1416 mov 32(%rsp),$rptr # restore $rptr
1417 sub 0($nptr),@ri[0]
1418 mov 16($tptr),@ri[2] # t[2]
1419 mov 24($tptr),@ri[3] # t[3]
1420 sbb 8($nptr),@ri[1]
1421 lea -1($num),$j # j=num/4-1
1422 jmp .Lsqr4x_sub
1423.align 16
1424.Lsqr4x_sub:
1425 mov @ri[0],0($rptr,$i,8) # rp[i]=tp[i]-np[i]
1426 mov @ri[1],8($rptr,$i,8) # rp[i]=tp[i]-np[i]
1427 sbb 16($nptr,$i,8),@ri[2]
1428 mov 32($tptr,$i,8),@ri[0] # tp[i+1]
1429 mov 40($tptr,$i,8),@ri[1]
1430 sbb 24($nptr,$i,8),@ri[3]
1431 mov @ri[2],16($rptr,$i,8) # rp[i]=tp[i]-np[i]
1432 mov @ri[3],24($rptr,$i,8) # rp[i]=tp[i]-np[i]
1433 sbb 32($nptr,$i,8),@ri[0]
1434 mov 48($tptr,$i,8),@ri[2]
1435 mov 56($tptr,$i,8),@ri[3]
1436 sbb 40($nptr,$i,8),@ri[1]
1437 lea 4($i),$i # i++
1438 dec $j # doesn't affect CF!
1439 jnz .Lsqr4x_sub
1440
1441 mov @ri[0],0($rptr,$i,8) # rp[i]=tp[i]-np[i]
1442 mov 32($tptr,$i,8),@ri[0] # load overflow bit
1443 sbb 16($nptr,$i,8),@ri[2]
1444 mov @ri[1],8($rptr,$i,8) # rp[i]=tp[i]-np[i]
1445 sbb 24($nptr,$i,8),@ri[3]
1446 mov @ri[2],16($rptr,$i,8) # rp[i]=tp[i]-np[i]
1447
1448 sbb \$0,@ri[0] # handle upmost overflow bit
1449 mov @ri[3],24($rptr,$i,8) # rp[i]=tp[i]-np[i]
1450 xor $i,$i # i=0
1451 and @ri[0],$tptr
1452 not @ri[0]
1453 mov $rptr,$nptr
1454 and @ri[0],$nptr
1455 lea -1($num),$j
1456 or $nptr,$tptr # tp=borrow?tp:rp
1457
1458 pxor %xmm0,%xmm0
1459 lea 64(%rsp,$num,8),$nptr
1460 movdqu ($tptr),%xmm1
1461 lea ($nptr,$num,8),$nptr
1462 movdqa %xmm0,64(%rsp) # zap lower half of temporary vector
1463 movdqa %xmm0,($nptr) # zap upper half of temporary vector
1464 movdqu %xmm1,($rptr)
1465 jmp .Lsqr4x_copy
1466.align 16
1467.Lsqr4x_copy: # copy or in-place refresh
1468 movdqu 16($tptr,$i),%xmm2
1469 movdqu 32($tptr,$i),%xmm1
1470 movdqa %xmm0,80(%rsp,$i) # zap lower half of temporary vector
1471 movdqa %xmm0,96(%rsp,$i) # zap lower half of temporary vector
1472 movdqa %xmm0,16($nptr,$i) # zap upper half of temporary vector
1473 movdqa %xmm0,32($nptr,$i) # zap upper half of temporary vector
1474 movdqu %xmm2,16($rptr,$i)
1475 movdqu %xmm1,32($rptr,$i)
1476 lea 32($i),$i
1477 dec $j
1478 jnz .Lsqr4x_copy
1479
1480 movdqu 16($tptr,$i),%xmm2
1481 movdqa %xmm0,80(%rsp,$i) # zap lower half of temporary vector
1482 movdqa %xmm0,16($nptr,$i) # zap upper half of temporary vector
1483 movdqu %xmm2,16($rptr,$i)
1484___
1485}
1486$code.=<<___;
1487 mov 56(%rsp),%rsi # restore %rsp
1488 mov \$1,%rax
1489 mov 0(%rsi),%r15
1490 mov 8(%rsi),%r14
1491 mov 16(%rsi),%r13
1492 mov 24(%rsi),%r12
1493 mov 32(%rsi),%rbp
1494 mov 40(%rsi),%rbx
1495 lea 48(%rsi),%rsp
1496.Lsqr4x_epilogue:
1497 ret
1498.size bn_sqr4x_mont,.-bn_sqr4x_mont
1499___
1500}}}
1501
1502print $code;
1503close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/x86_64-mont5.pl b/src/lib/libcrypto/bn/asm/x86_64-mont5.pl
deleted file mode 100755
index 38751ec5de..0000000000
--- a/src/lib/libcrypto/bn/asm/x86_64-mont5.pl
+++ /dev/null
@@ -1,1192 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# August 2011.
11#
12# Companion to x86_64-mont.pl that optimizes cache-timing attack
13# countermeasures. The subroutines are produced by replacing bp[i]
14# references in their x86_64-mont.pl counterparts with cache-neutral
15# references to powers table computed in BN_mod_exp_mont_consttime.
16# In addition subroutine that scatters elements of the powers table
17# is implemented, so that scatter-/gathering can be tuned without
18# bn_exp.c modifications.
19
20$flavour = shift;
21$output = shift;
22if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
23
24$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
25
26$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
27( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
28( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
29die "can't locate x86_64-xlate.pl";
30
31open OUT,"| \"$^X\" $xlate $flavour $output";
32*STDOUT=*OUT;
33
34# int bn_mul_mont_gather5(
35$rp="%rdi"; # BN_ULONG *rp,
36$ap="%rsi"; # const BN_ULONG *ap,
37$bp="%rdx"; # const BN_ULONG *bp,
38$np="%rcx"; # const BN_ULONG *np,
39$n0="%r8"; # const BN_ULONG *n0,
40$num="%r9"; # int num,
41 # int idx); # 0 to 2^5-1, "index" in $bp holding
42 # pre-computed powers of a', interlaced
43 # in such manner that b[0] is $bp[idx],
44 # b[1] is [2^5+idx], etc.
45$lo0="%r10";
46$hi0="%r11";
47$hi1="%r13";
48$i="%r14";
49$j="%r15";
50$m0="%rbx";
51$m1="%rbp";
52
53$code=<<___;
54.text
55
56.globl bn_mul_mont_gather5
57.type bn_mul_mont_gather5,\@function,6
58.align 64
59bn_mul_mont_gather5:
60 _CET_ENDBR
61 test \$3,${num}d
62 jnz .Lmul_enter
63 cmp \$8,${num}d
64 jb .Lmul_enter
65 jmp .Lmul4x_enter
66
67.align 16
68.Lmul_enter:
69 mov ${num}d,${num}d
70 movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument
71 lea .Linc(%rip),%r10
72 push %rbx
73 push %rbp
74 push %r12
75 push %r13
76 push %r14
77 push %r15
78
79.Lmul_alloca:
80 mov %rsp,%rax
81 lea 2($num),%r11
82 neg %r11
83 lea -264(%rsp,%r11,8),%rsp # tp=alloca(8*(num+2)+256+8)
84 and \$-1024,%rsp # minimize TLB usage
85
86 mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
87.Lmul_body:
88 lea 128($bp),%r12 # reassign $bp (+size optimization)
89___
90 $bp="%r12";
91 $STRIDE=2**5*8; # 5 is "window size"
92 $N=$STRIDE/4; # should match cache line size
93$code.=<<___;
94 movdqa 0(%r10),%xmm0 # 00000001000000010000000000000000
95 movdqa 16(%r10),%xmm1 # 00000002000000020000000200000002
96 lea 24-112(%rsp,$num,8),%r10# place the mask after tp[num+3] (+ICache optimization)
97 and \$-16,%r10
98
99 pshufd \$0,%xmm5,%xmm5 # broadcast index
100 movdqa %xmm1,%xmm4
101 movdqa %xmm1,%xmm2
102___
103########################################################################
104# calculate mask by comparing 0..31 to index and save result to stack
105#
106$code.=<<___;
107 paddd %xmm0,%xmm1
108 pcmpeqd %xmm5,%xmm0 # compare to 1,0
109 .byte 0x67
110 movdqa %xmm4,%xmm3
111___
112for($k=0;$k<$STRIDE/16-4;$k+=4) {
113$code.=<<___;
114 paddd %xmm1,%xmm2
115 pcmpeqd %xmm5,%xmm1 # compare to 3,2
116 movdqa %xmm0,`16*($k+0)+112`(%r10)
117 movdqa %xmm4,%xmm0
118
119 paddd %xmm2,%xmm3
120 pcmpeqd %xmm5,%xmm2 # compare to 5,4
121 movdqa %xmm1,`16*($k+1)+112`(%r10)
122 movdqa %xmm4,%xmm1
123
124 paddd %xmm3,%xmm0
125 pcmpeqd %xmm5,%xmm3 # compare to 7,6
126 movdqa %xmm2,`16*($k+2)+112`(%r10)
127 movdqa %xmm4,%xmm2
128
129 paddd %xmm0,%xmm1
130 pcmpeqd %xmm5,%xmm0
131 movdqa %xmm3,`16*($k+3)+112`(%r10)
132 movdqa %xmm4,%xmm3
133___
134}
135$code.=<<___; # last iteration can be optimized
136 paddd %xmm1,%xmm2
137 pcmpeqd %xmm5,%xmm1
138 movdqa %xmm0,`16*($k+0)+112`(%r10)
139
140 paddd %xmm2,%xmm3
141 .byte 0x67
142 pcmpeqd %xmm5,%xmm2
143 movdqa %xmm1,`16*($k+1)+112`(%r10)
144
145 pcmpeqd %xmm5,%xmm3
146 movdqa %xmm2,`16*($k+2)+112`(%r10)
147 pand `16*($k+0)-128`($bp),%xmm0 # while it's still in register
148
149 pand `16*($k+1)-128`($bp),%xmm1
150 pand `16*($k+2)-128`($bp),%xmm2
151 movdqa %xmm3,`16*($k+3)+112`(%r10)
152 pand `16*($k+3)-128`($bp),%xmm3
153 por %xmm2,%xmm0
154 por %xmm3,%xmm1
155___
156for($k=0;$k<$STRIDE/16-4;$k+=4) {
157$code.=<<___;
158 movdqa `16*($k+0)-128`($bp),%xmm4
159 movdqa `16*($k+1)-128`($bp),%xmm5
160 movdqa `16*($k+2)-128`($bp),%xmm2
161 pand `16*($k+0)+112`(%r10),%xmm4
162 movdqa `16*($k+3)-128`($bp),%xmm3
163 pand `16*($k+1)+112`(%r10),%xmm5
164 por %xmm4,%xmm0
165 pand `16*($k+2)+112`(%r10),%xmm2
166 por %xmm5,%xmm1
167 pand `16*($k+3)+112`(%r10),%xmm3
168 por %xmm2,%xmm0
169 por %xmm3,%xmm1
170___
171}
172$code.=<<___;
173 por %xmm1,%xmm0
174 pshufd \$0x4e,%xmm0,%xmm1
175 por %xmm1,%xmm0
176 lea $STRIDE($bp),$bp
177 movd %xmm0,$m0 # m0=bp[0]
178
179 mov ($n0),$n0 # pull n0[0] value
180 mov ($ap),%rax
181
182 xor $i,$i # i=0
183 xor $j,$j # j=0
184
185 mov $n0,$m1
186 mulq $m0 # ap[0]*bp[0]
187 mov %rax,$lo0
188 mov ($np),%rax
189
190 imulq $lo0,$m1 # "tp[0]"*n0
191 mov %rdx,$hi0
192
193 mulq $m1 # np[0]*m1
194 add %rax,$lo0 # discarded
195 mov 8($ap),%rax
196 adc \$0,%rdx
197 mov %rdx,$hi1
198
199 lea 1($j),$j # j++
200 jmp .L1st_enter
201
202.align 16
203.L1st:
204 add %rax,$hi1
205 mov ($ap,$j,8),%rax
206 adc \$0,%rdx
207 add $hi0,$hi1 # np[j]*m1+ap[j]*bp[0]
208 mov $lo0,$hi0
209 adc \$0,%rdx
210 mov $hi1,-16(%rsp,$j,8) # tp[j-1]
211 mov %rdx,$hi1
212
213.L1st_enter:
214 mulq $m0 # ap[j]*bp[0]
215 add %rax,$hi0
216 mov ($np,$j,8),%rax
217 adc \$0,%rdx
218 lea 1($j),$j # j++
219 mov %rdx,$lo0
220
221 mulq $m1 # np[j]*m1
222 cmp $num,$j
223 jl .L1st
224
225 add %rax,$hi1
226 mov ($ap),%rax # ap[0]
227 adc \$0,%rdx
228 add $hi0,$hi1 # np[j]*m1+ap[j]*bp[0]
229 adc \$0,%rdx
230 mov $hi1,-16(%rsp,$j,8) # tp[j-1]
231 mov %rdx,$hi1
232 mov $lo0,$hi0
233
234 xor %rdx,%rdx
235 add $hi0,$hi1
236 adc \$0,%rdx
237 mov $hi1,-8(%rsp,$num,8)
238 mov %rdx,(%rsp,$num,8) # store upmost overflow bit
239
240 lea 1($i),$i # i++
241 jmp .Louter
242.align 16
243.Louter:
244 lea 24+128(%rsp,$num,8),%rdx # where 256-byte mask is (+size optimization)
245 and \$-16,%rdx
246 pxor %xmm4,%xmm4
247 pxor %xmm5,%xmm5
248___
249for($k=0;$k<$STRIDE/16;$k+=4) {
250$code.=<<___;
251 movdqa `16*($k+0)-128`($bp),%xmm0
252 movdqa `16*($k+1)-128`($bp),%xmm1
253 movdqa `16*($k+2)-128`($bp),%xmm2
254 movdqa `16*($k+3)-128`($bp),%xmm3
255 pand `16*($k+0)-128`(%rdx),%xmm0
256 pand `16*($k+1)-128`(%rdx),%xmm1
257 por %xmm0,%xmm4
258 pand `16*($k+2)-128`(%rdx),%xmm2
259 por %xmm1,%xmm5
260 pand `16*($k+3)-128`(%rdx),%xmm3
261 por %xmm2,%xmm4
262 por %xmm3,%xmm5
263___
264}
265$code.=<<___;
266 por %xmm5,%xmm4
267 pshufd \$0x4e,%xmm4,%xmm0
268 por %xmm4,%xmm0
269 lea $STRIDE($bp),$bp
270 movd %xmm0,$m0 # m0=bp[i]
271
272 xor $j,$j # j=0
273 mov $n0,$m1
274 mov (%rsp),$lo0
275
276 mulq $m0 # ap[0]*bp[i]
277 add %rax,$lo0 # ap[0]*bp[i]+tp[0]
278 mov ($np),%rax
279 adc \$0,%rdx
280
281 imulq $lo0,$m1 # tp[0]*n0
282 mov %rdx,$hi0
283
284 mulq $m1 # np[0]*m1
285 add %rax,$lo0 # discarded
286 mov 8($ap),%rax
287 adc \$0,%rdx
288 mov 8(%rsp),$lo0 # tp[1]
289 mov %rdx,$hi1
290
291 lea 1($j),$j # j++
292 jmp .Linner_enter
293
294.align 16
295.Linner:
296 add %rax,$hi1
297 mov ($ap,$j,8),%rax
298 adc \$0,%rdx
299 add $lo0,$hi1 # np[j]*m1+ap[j]*bp[i]+tp[j]
300 mov (%rsp,$j,8),$lo0
301 adc \$0,%rdx
302 mov $hi1,-16(%rsp,$j,8) # tp[j-1]
303 mov %rdx,$hi1
304
305.Linner_enter:
306 mulq $m0 # ap[j]*bp[i]
307 add %rax,$hi0
308 mov ($np,$j,8),%rax
309 adc \$0,%rdx
310 add $hi0,$lo0 # ap[j]*bp[i]+tp[j]
311 mov %rdx,$hi0
312 adc \$0,$hi0
313 lea 1($j),$j # j++
314
315 mulq $m1 # np[j]*m1
316 cmp $num,$j
317 jl .Linner
318
319 add %rax,$hi1
320 mov ($ap),%rax # ap[0]
321 adc \$0,%rdx
322 add $lo0,$hi1 # np[j]*m1+ap[j]*bp[i]+tp[j]
323 mov (%rsp,$j,8),$lo0
324 adc \$0,%rdx
325 mov $hi1,-16(%rsp,$j,8) # tp[j-1]
326 mov %rdx,$hi1
327
328 xor %rdx,%rdx
329 add $hi0,$hi1
330 adc \$0,%rdx
331 add $lo0,$hi1 # pull upmost overflow bit
332 adc \$0,%rdx
333 mov $hi1,-8(%rsp,$num,8)
334 mov %rdx,(%rsp,$num,8) # store upmost overflow bit
335
336 lea 1($i),$i # i++
337 cmp $num,$i
338 jl .Louter
339
340 xor $i,$i # i=0 and clear CF!
341 mov (%rsp),%rax # tp[0]
342 lea (%rsp),$ap # borrow ap for tp
343 mov $num,$j # j=num
344 jmp .Lsub
345.align 16
346.Lsub: sbb ($np,$i,8),%rax
347 mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i]
348 mov 8($ap,$i,8),%rax # tp[i+1]
349 lea 1($i),$i # i++
350 dec $j # doesnn't affect CF!
351 jnz .Lsub
352
353 sbb \$0,%rax # handle upmost overflow bit
354 xor $i,$i
355 and %rax,$ap
356 not %rax
357 mov $rp,$np
358 and %rax,$np
359 mov $num,$j # j=num
360 or $np,$ap # ap=borrow?tp:rp
361.align 16
362.Lcopy: # copy or in-place refresh
363 mov ($ap,$i,8),%rax
364 mov $i,(%rsp,$i,8) # zap temporary vector
365 mov %rax,($rp,$i,8) # rp[i]=tp[i]
366 lea 1($i),$i
367 sub \$1,$j
368 jnz .Lcopy
369
370 mov 8(%rsp,$num,8),%rsi # restore %rsp
371 mov \$1,%rax
372
373 mov (%rsi),%r15
374 mov 8(%rsi),%r14
375 mov 16(%rsi),%r13
376 mov 24(%rsi),%r12
377 mov 32(%rsi),%rbp
378 mov 40(%rsi),%rbx
379 lea 48(%rsi),%rsp
380.Lmul_epilogue:
381 ret
382.size bn_mul_mont_gather5,.-bn_mul_mont_gather5
383___
384{{{
385my @A=("%r10","%r11");
386my @N=("%r13","%rdi");
387$code.=<<___;
388.type bn_mul4x_mont_gather5,\@function,6
389.align 16
390bn_mul4x_mont_gather5:
391 _CET_ENDBR
392.Lmul4x_enter:
393 mov ${num}d,${num}d
394 movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument
395 lea .Linc(%rip),%r10
396 push %rbx
397 push %rbp
398 push %r12
399 push %r13
400 push %r14
401 push %r15
402
403.Lmul4x_alloca:
404 mov %rsp,%rax
405 lea 4($num),%r11
406 neg %r11
407 lea -256(%rsp,%r11,8),%rsp # tp=alloca(8*(num+4)+256)
408 and \$-1024,%rsp # minimize TLB usage
409
410 mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
411.Lmul4x_body:
412 mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp
413 lea 128(%rdx),%r12 # reassign $bp (+size optimization)
414___
415 $bp="%r12";
416 $STRIDE=2**5*8; # 5 is "window size"
417 $N=$STRIDE/4; # should match cache line size
418$code.=<<___;
419 movdqa 0(%r10),%xmm0 # 00000001000000010000000000000000
420 movdqa 16(%r10),%xmm1 # 00000002000000020000000200000002
421 lea 32-112(%rsp,$num,8),%r10# place the mask after tp[num+4] (+ICache optimization)
422
423 pshufd \$0,%xmm5,%xmm5 # broadcast index
424 movdqa %xmm1,%xmm4
425 .byte 0x67,0x67
426 movdqa %xmm1,%xmm2
427___
428########################################################################
429# calculate mask by comparing 0..31 to index and save result to stack
430#
431$code.=<<___;
432 paddd %xmm0,%xmm1
433 pcmpeqd %xmm5,%xmm0 # compare to 1,0
434 .byte 0x67
435 movdqa %xmm4,%xmm3
436___
437for($k=0;$k<$STRIDE/16-4;$k+=4) {
438$code.=<<___;
439 paddd %xmm1,%xmm2
440 pcmpeqd %xmm5,%xmm1 # compare to 3,2
441 movdqa %xmm0,`16*($k+0)+112`(%r10)
442 movdqa %xmm4,%xmm0
443
444 paddd %xmm2,%xmm3
445 pcmpeqd %xmm5,%xmm2 # compare to 5,4
446 movdqa %xmm1,`16*($k+1)+112`(%r10)
447 movdqa %xmm4,%xmm1
448
449 paddd %xmm3,%xmm0
450 pcmpeqd %xmm5,%xmm3 # compare to 7,6
451 movdqa %xmm2,`16*($k+2)+112`(%r10)
452 movdqa %xmm4,%xmm2
453
454 paddd %xmm0,%xmm1
455 pcmpeqd %xmm5,%xmm0
456 movdqa %xmm3,`16*($k+3)+112`(%r10)
457 movdqa %xmm4,%xmm3
458___
459}
460$code.=<<___; # last iteration can be optimized
461 paddd %xmm1,%xmm2
462 pcmpeqd %xmm5,%xmm1
463 movdqa %xmm0,`16*($k+0)+112`(%r10)
464
465 paddd %xmm2,%xmm3
466 .byte 0x67
467 pcmpeqd %xmm5,%xmm2
468 movdqa %xmm1,`16*($k+1)+112`(%r10)
469
470 pcmpeqd %xmm5,%xmm3
471 movdqa %xmm2,`16*($k+2)+112`(%r10)
472 pand `16*($k+0)-128`($bp),%xmm0 # while it's still in register
473
474 pand `16*($k+1)-128`($bp),%xmm1
475 pand `16*($k+2)-128`($bp),%xmm2
476 movdqa %xmm3,`16*($k+3)+112`(%r10)
477 pand `16*($k+3)-128`($bp),%xmm3
478 por %xmm2,%xmm0
479 por %xmm3,%xmm1
480___
481for($k=0;$k<$STRIDE/16-4;$k+=4) {
482$code.=<<___;
483 movdqa `16*($k+0)-128`($bp),%xmm4
484 movdqa `16*($k+1)-128`($bp),%xmm5
485 movdqa `16*($k+2)-128`($bp),%xmm2
486 pand `16*($k+0)+112`(%r10),%xmm4
487 movdqa `16*($k+3)-128`($bp),%xmm3
488 pand `16*($k+1)+112`(%r10),%xmm5
489 por %xmm4,%xmm0
490 pand `16*($k+2)+112`(%r10),%xmm2
491 por %xmm5,%xmm1
492 pand `16*($k+3)+112`(%r10),%xmm3
493 por %xmm2,%xmm0
494 por %xmm3,%xmm1
495___
496}
497$code.=<<___;
498 por %xmm1,%xmm0
499 pshufd \$0x4e,%xmm0,%xmm1
500 por %xmm1,%xmm0
501 lea $STRIDE($bp),$bp
502 movd %xmm0,$m0 # m0=bp[0]
503
504 mov ($n0),$n0 # pull n0[0] value
505 mov ($ap),%rax
506
507 xor $i,$i # i=0
508 xor $j,$j # j=0
509
510 mov $n0,$m1
511 mulq $m0 # ap[0]*bp[0]
512 mov %rax,$A[0]
513 mov ($np),%rax
514
515 imulq $A[0],$m1 # "tp[0]"*n0
516 mov %rdx,$A[1]
517
518 mulq $m1 # np[0]*m1
519 add %rax,$A[0] # discarded
520 mov 8($ap),%rax
521 adc \$0,%rdx
522 mov %rdx,$N[1]
523
524 mulq $m0
525 add %rax,$A[1]
526 mov 8($np),%rax
527 adc \$0,%rdx
528 mov %rdx,$A[0]
529
530 mulq $m1
531 add %rax,$N[1]
532 mov 16($ap),%rax
533 adc \$0,%rdx
534 add $A[1],$N[1]
535 lea 4($j),$j # j++
536 adc \$0,%rdx
537 mov $N[1],(%rsp)
538 mov %rdx,$N[0]
539 jmp .L1st4x
540.align 16
541.L1st4x:
542 mulq $m0 # ap[j]*bp[0]
543 add %rax,$A[0]
544 mov -16($np,$j,8),%rax
545 adc \$0,%rdx
546 mov %rdx,$A[1]
547
548 mulq $m1 # np[j]*m1
549 add %rax,$N[0]
550 mov -8($ap,$j,8),%rax
551 adc \$0,%rdx
552 add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0]
553 adc \$0,%rdx
554 mov $N[0],-24(%rsp,$j,8) # tp[j-1]
555 mov %rdx,$N[1]
556
557 mulq $m0 # ap[j]*bp[0]
558 add %rax,$A[1]
559 mov -8($np,$j,8),%rax
560 adc \$0,%rdx
561 mov %rdx,$A[0]
562
563 mulq $m1 # np[j]*m1
564 add %rax,$N[1]
565 mov ($ap,$j,8),%rax
566 adc \$0,%rdx
567 add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0]
568 adc \$0,%rdx
569 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
570 mov %rdx,$N[0]
571
572 mulq $m0 # ap[j]*bp[0]
573 add %rax,$A[0]
574 mov ($np,$j,8),%rax
575 adc \$0,%rdx
576 mov %rdx,$A[1]
577
578 mulq $m1 # np[j]*m1
579 add %rax,$N[0]
580 mov 8($ap,$j,8),%rax
581 adc \$0,%rdx
582 add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0]
583 adc \$0,%rdx
584 mov $N[0],-8(%rsp,$j,8) # tp[j-1]
585 mov %rdx,$N[1]
586
587 mulq $m0 # ap[j]*bp[0]
588 add %rax,$A[1]
589 mov 8($np,$j,8),%rax
590 adc \$0,%rdx
591 lea 4($j),$j # j++
592 mov %rdx,$A[0]
593
594 mulq $m1 # np[j]*m1
595 add %rax,$N[1]
596 mov -16($ap,$j,8),%rax
597 adc \$0,%rdx
598 add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0]
599 adc \$0,%rdx
600 mov $N[1],-32(%rsp,$j,8) # tp[j-1]
601 mov %rdx,$N[0]
602 cmp $num,$j
603 jl .L1st4x
604
605 mulq $m0 # ap[j]*bp[0]
606 add %rax,$A[0]
607 mov -16($np,$j,8),%rax
608 adc \$0,%rdx
609 mov %rdx,$A[1]
610
611 mulq $m1 # np[j]*m1
612 add %rax,$N[0]
613 mov -8($ap,$j,8),%rax
614 adc \$0,%rdx
615 add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0]
616 adc \$0,%rdx
617 mov $N[0],-24(%rsp,$j,8) # tp[j-1]
618 mov %rdx,$N[1]
619
620 mulq $m0 # ap[j]*bp[0]
621 add %rax,$A[1]
622 mov -8($np,$j,8),%rax
623 adc \$0,%rdx
624 mov %rdx,$A[0]
625
626 mulq $m1 # np[j]*m1
627 add %rax,$N[1]
628 mov ($ap),%rax # ap[0]
629 adc \$0,%rdx
630 add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0]
631 adc \$0,%rdx
632 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
633 mov %rdx,$N[0]
634
635 xor $N[1],$N[1]
636 add $A[0],$N[0]
637 adc \$0,$N[1]
638 mov $N[0],-8(%rsp,$j,8)
639 mov $N[1],(%rsp,$j,8) # store upmost overflow bit
640
641 lea 1($i),$i # i++
642.align 4
643.Louter4x:
644 lea 32+128(%rsp,$num,8),%rdx # where 256-byte mask is (+size optimization)
645 pxor %xmm4,%xmm4
646 pxor %xmm5,%xmm5
647___
648for($k=0;$k<$STRIDE/16;$k+=4) {
649$code.=<<___;
650 movdqa `16*($k+0)-128`($bp),%xmm0
651 movdqa `16*($k+1)-128`($bp),%xmm1
652 movdqa `16*($k+2)-128`($bp),%xmm2
653 movdqa `16*($k+3)-128`($bp),%xmm3
654 pand `16*($k+0)-128`(%rdx),%xmm0
655 pand `16*($k+1)-128`(%rdx),%xmm1
656 por %xmm0,%xmm4
657 pand `16*($k+2)-128`(%rdx),%xmm2
658 por %xmm1,%xmm5
659 pand `16*($k+3)-128`(%rdx),%xmm3
660 por %xmm2,%xmm4
661 por %xmm3,%xmm5
662___
663}
664$code.=<<___;
665 por %xmm5,%xmm4
666 pshufd \$0x4e,%xmm4,%xmm0
667 por %xmm4,%xmm0
668 lea $STRIDE($bp),$bp
669 movd %xmm0,$m0 # m0=bp[i]
670
671 xor $j,$j # j=0
672
673 mov (%rsp),$A[0]
674 mov $n0,$m1
675 mulq $m0 # ap[0]*bp[i]
676 add %rax,$A[0] # ap[0]*bp[i]+tp[0]
677 mov ($np),%rax
678 adc \$0,%rdx
679
680 imulq $A[0],$m1 # tp[0]*n0
681 mov %rdx,$A[1]
682
683 mulq $m1 # np[0]*m1
684 add %rax,$A[0] # "$N[0]", discarded
685 mov 8($ap),%rax
686 adc \$0,%rdx
687 mov %rdx,$N[1]
688
689 mulq $m0 # ap[j]*bp[i]
690 add %rax,$A[1]
691 mov 8($np),%rax
692 adc \$0,%rdx
693 add 8(%rsp),$A[1] # +tp[1]
694 adc \$0,%rdx
695 mov %rdx,$A[0]
696
697 mulq $m1 # np[j]*m1
698 add %rax,$N[1]
699 mov 16($ap),%rax
700 adc \$0,%rdx
701 add $A[1],$N[1] # np[j]*m1+ap[j]*bp[i]+tp[j]
702 lea 4($j),$j # j+=2
703 adc \$0,%rdx
704 mov %rdx,$N[0]
705 jmp .Linner4x
706.align 16
707.Linner4x:
708 mulq $m0 # ap[j]*bp[i]
709 add %rax,$A[0]
710 mov -16($np,$j,8),%rax
711 adc \$0,%rdx
712 add -16(%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j]
713 adc \$0,%rdx
714 mov %rdx,$A[1]
715
716 mulq $m1 # np[j]*m1
717 add %rax,$N[0]
718 mov -8($ap,$j,8),%rax
719 adc \$0,%rdx
720 add $A[0],$N[0]
721 adc \$0,%rdx
722 mov $N[1],-32(%rsp,$j,8) # tp[j-1]
723 mov %rdx,$N[1]
724
725 mulq $m0 # ap[j]*bp[i]
726 add %rax,$A[1]
727 mov -8($np,$j,8),%rax
728 adc \$0,%rdx
729 add -8(%rsp,$j,8),$A[1]
730 adc \$0,%rdx
731 mov %rdx,$A[0]
732
733 mulq $m1 # np[j]*m1
734 add %rax,$N[1]
735 mov ($ap,$j,8),%rax
736 adc \$0,%rdx
737 add $A[1],$N[1]
738 adc \$0,%rdx
739 mov $N[0],-24(%rsp,$j,8) # tp[j-1]
740 mov %rdx,$N[0]
741
742 mulq $m0 # ap[j]*bp[i]
743 add %rax,$A[0]
744 mov ($np,$j,8),%rax
745 adc \$0,%rdx
746 add (%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j]
747 adc \$0,%rdx
748 mov %rdx,$A[1]
749
750 mulq $m1 # np[j]*m1
751 add %rax,$N[0]
752 mov 8($ap,$j,8),%rax
753 adc \$0,%rdx
754 add $A[0],$N[0]
755 adc \$0,%rdx
756 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
757 mov %rdx,$N[1]
758
759 mulq $m0 # ap[j]*bp[i]
760 add %rax,$A[1]
761 mov 8($np,$j,8),%rax
762 adc \$0,%rdx
763 add 8(%rsp,$j,8),$A[1]
764 adc \$0,%rdx
765 lea 4($j),$j # j++
766 mov %rdx,$A[0]
767
768 mulq $m1 # np[j]*m1
769 add %rax,$N[1]
770 mov -16($ap,$j,8),%rax
771 adc \$0,%rdx
772 add $A[1],$N[1]
773 adc \$0,%rdx
774 mov $N[0],-40(%rsp,$j,8) # tp[j-1]
775 mov %rdx,$N[0]
776 cmp $num,$j
777 jl .Linner4x
778
779 mulq $m0 # ap[j]*bp[i]
780 add %rax,$A[0]
781 mov -16($np,$j,8),%rax
782 adc \$0,%rdx
783 add -16(%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j]
784 adc \$0,%rdx
785 mov %rdx,$A[1]
786
787 mulq $m1 # np[j]*m1
788 add %rax,$N[0]
789 mov -8($ap,$j,8),%rax
790 adc \$0,%rdx
791 add $A[0],$N[0]
792 adc \$0,%rdx
793 mov $N[1],-32(%rsp,$j,8) # tp[j-1]
794 mov %rdx,$N[1]
795
796 mulq $m0 # ap[j]*bp[i]
797 add %rax,$A[1]
798 mov -8($np,$j,8),%rax
799 adc \$0,%rdx
800 add -8(%rsp,$j,8),$A[1]
801 adc \$0,%rdx
802 lea 1($i),$i # i++
803 mov %rdx,$A[0]
804
805 mulq $m1 # np[j]*m1
806 add %rax,$N[1]
807 mov ($ap),%rax # ap[0]
808 adc \$0,%rdx
809 add $A[1],$N[1]
810 adc \$0,%rdx
811 mov $N[0],-24(%rsp,$j,8) # tp[j-1]
812 mov %rdx,$N[0]
813
814 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
815
816 xor $N[1],$N[1]
817 add $A[0],$N[0]
818 adc \$0,$N[1]
819 add (%rsp,$num,8),$N[0] # pull upmost overflow bit
820 adc \$0,$N[1]
821 mov $N[0],-8(%rsp,$j,8)
822 mov $N[1],(%rsp,$j,8) # store upmost overflow bit
823
824 cmp $num,$i
825 jl .Louter4x
826___
827{
828my @ri=("%rax","%rdx",$m0,$m1);
829$code.=<<___;
830 mov 16(%rsp,$num,8),$rp # restore $rp
831 mov 0(%rsp),@ri[0] # tp[0]
832 pxor %xmm0,%xmm0
833 mov 8(%rsp),@ri[1] # tp[1]
834 shr \$2,$num # num/=4
835 lea (%rsp),$ap # borrow ap for tp
836 xor $i,$i # i=0 and clear CF!
837
838 sub 0($np),@ri[0]
839 mov 16($ap),@ri[2] # tp[2]
840 mov 24($ap),@ri[3] # tp[3]
841 sbb 8($np),@ri[1]
842 lea -1($num),$j # j=num/4-1
843 jmp .Lsub4x
844.align 16
845.Lsub4x:
846 mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i]
847 mov @ri[1],8($rp,$i,8) # rp[i]=tp[i]-np[i]
848 sbb 16($np,$i,8),@ri[2]
849 mov 32($ap,$i,8),@ri[0] # tp[i+1]
850 mov 40($ap,$i,8),@ri[1]
851 sbb 24($np,$i,8),@ri[3]
852 mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i]
853 mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i]
854 sbb 32($np,$i,8),@ri[0]
855 mov 48($ap,$i,8),@ri[2]
856 mov 56($ap,$i,8),@ri[3]
857 sbb 40($np,$i,8),@ri[1]
858 lea 4($i),$i # i++
859 dec $j # doesnn't affect CF!
860 jnz .Lsub4x
861
862 mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i]
863 mov 32($ap,$i,8),@ri[0] # load overflow bit
864 sbb 16($np,$i,8),@ri[2]
865 mov @ri[1],8($rp,$i,8) # rp[i]=tp[i]-np[i]
866 sbb 24($np,$i,8),@ri[3]
867 mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i]
868
869 sbb \$0,@ri[0] # handle upmost overflow bit
870 mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i]
871 xor $i,$i # i=0
872 and @ri[0],$ap
873 not @ri[0]
874 mov $rp,$np
875 and @ri[0],$np
876 lea -1($num),$j
877 or $np,$ap # ap=borrow?tp:rp
878
879 movdqu ($ap),%xmm1
880 movdqa %xmm0,(%rsp)
881 movdqu %xmm1,($rp)
882 jmp .Lcopy4x
883.align 16
884.Lcopy4x: # copy or in-place refresh
885 movdqu 16($ap,$i),%xmm2
886 movdqu 32($ap,$i),%xmm1
887 movdqa %xmm0,16(%rsp,$i)
888 movdqu %xmm2,16($rp,$i)
889 movdqa %xmm0,32(%rsp,$i)
890 movdqu %xmm1,32($rp,$i)
891 lea 32($i),$i
892 dec $j
893 jnz .Lcopy4x
894
895 shl \$2,$num
896 movdqu 16($ap,$i),%xmm2
897 movdqa %xmm0,16(%rsp,$i)
898 movdqu %xmm2,16($rp,$i)
899___
900}
901$code.=<<___;
902 mov 8(%rsp,$num,8),%rsi # restore %rsp
903 mov \$1,%rax
904
905 mov (%rsi),%r15
906 mov 8(%rsi),%r14
907 mov 16(%rsi),%r13
908 mov 24(%rsi),%r12
909 mov 32(%rsi),%rbp
910 mov 40(%rsi),%rbx
911 lea 48(%rsi),%rsp
912.Lmul4x_epilogue:
913 ret
914.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
915___
916}}}
917
918{
919my ($inp,$num,$tbl,$idx)=$win64?("%rcx","%rdx","%r8", "%r9d") : # Win64 order
920 ("%rdi","%rsi","%rdx","%ecx"); # Unix order
921my $out=$inp;
922my $STRIDE=2**5*8;
923my $N=$STRIDE/4;
924
925$code.=<<___;
926.globl bn_scatter5
927.type bn_scatter5,\@abi-omnipotent
928.align 16
929bn_scatter5:
930 _CET_ENDBR
931 cmp \$0, $num
932 jz .Lscatter_epilogue
933 lea ($tbl,$idx,8),$tbl
934.Lscatter:
935 mov ($inp),%rax
936 lea 8($inp),$inp
937 mov %rax,($tbl)
938 lea 32*8($tbl),$tbl
939 sub \$1,$num
940 jnz .Lscatter
941.Lscatter_epilogue:
942 ret
943.size bn_scatter5,.-bn_scatter5
944
945.globl bn_gather5
946.type bn_gather5,\@abi-omnipotent
947.align 16
948bn_gather5:
949 _CET_ENDBR
950.LSEH_begin_bn_gather5: # Win64 thing, but harmless in other cases
951 # I can't trust assembler to use specific encoding:-(
952 .byte 0x4c,0x8d,0x14,0x24 # lea (%rsp),%r10
953 .byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 # sub $0x108,%rsp
954 lea .Linc(%rip),%rax
955 and \$-16,%rsp # shouldn't be formally required
956
957 movd $idx,%xmm5
958 movdqa 0(%rax),%xmm0 # 00000001000000010000000000000000
959 movdqa 16(%rax),%xmm1 # 00000002000000020000000200000002
960 lea 128($tbl),%r11 # size optimization
961 lea 128(%rsp),%rax # size optimization
962
963 pshufd \$0,%xmm5,%xmm5 # broadcast $idx
964 movdqa %xmm1,%xmm4
965 movdqa %xmm1,%xmm2
966___
967########################################################################
968# calculate mask by comparing 0..31 to $idx and save result to stack
969#
970for($i=0;$i<$STRIDE/16;$i+=4) {
971$code.=<<___;
972 paddd %xmm0,%xmm1
973 pcmpeqd %xmm5,%xmm0 # compare to 1,0
974___
975$code.=<<___ if ($i);
976 movdqa %xmm3,`16*($i-1)-128`(%rax)
977___
978$code.=<<___;
979 movdqa %xmm4,%xmm3
980
981 paddd %xmm1,%xmm2
982 pcmpeqd %xmm5,%xmm1 # compare to 3,2
983 movdqa %xmm0,`16*($i+0)-128`(%rax)
984 movdqa %xmm4,%xmm0
985
986 paddd %xmm2,%xmm3
987 pcmpeqd %xmm5,%xmm2 # compare to 5,4
988 movdqa %xmm1,`16*($i+1)-128`(%rax)
989 movdqa %xmm4,%xmm1
990
991 paddd %xmm3,%xmm0
992 pcmpeqd %xmm5,%xmm3 # compare to 7,6
993 movdqa %xmm2,`16*($i+2)-128`(%rax)
994 movdqa %xmm4,%xmm2
995___
996}
997$code.=<<___;
998 movdqa %xmm3,`16*($i-1)-128`(%rax)
999 jmp .Lgather
1000
1001.align 32
1002.Lgather:
1003 pxor %xmm4,%xmm4
1004 pxor %xmm5,%xmm5
1005___
1006for($i=0;$i<$STRIDE/16;$i+=4) {
1007$code.=<<___;
1008 movdqa `16*($i+0)-128`(%r11),%xmm0
1009 movdqa `16*($i+1)-128`(%r11),%xmm1
1010 movdqa `16*($i+2)-128`(%r11),%xmm2
1011 pand `16*($i+0)-128`(%rax),%xmm0
1012 movdqa `16*($i+3)-128`(%r11),%xmm3
1013 pand `16*($i+1)-128`(%rax),%xmm1
1014 por %xmm0,%xmm4
1015 pand `16*($i+2)-128`(%rax),%xmm2
1016 por %xmm1,%xmm5
1017 pand `16*($i+3)-128`(%rax),%xmm3
1018 por %xmm2,%xmm4
1019 por %xmm3,%xmm5
1020___
1021}
1022$code.=<<___;
1023 por %xmm5,%xmm4
1024 lea $STRIDE(%r11),%r11
1025 pshufd \$0x4e,%xmm4,%xmm0
1026 por %xmm4,%xmm0
1027 movq %xmm0,($out) # m0=bp[0]
1028 lea 8($out),$out
1029 sub \$1,$num
1030 jnz .Lgather
1031
1032 lea (%r10),%rsp
1033 ret
1034.LSEH_end_bn_gather5:
1035.size bn_gather5,.-bn_gather5
1036___
1037}
1038$code.=<<___;
1039.section .rodata
1040.align 64
1041.Linc:
1042 .long 0,0, 1,1
1043 .long 2,2, 2,2
1044.text
1045___
1046
1047# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
1048# CONTEXT *context,DISPATCHER_CONTEXT *disp)
1049if ($win64) {
1050$rec="%rcx";
1051$frame="%rdx";
1052$context="%r8";
1053$disp="%r9";
1054
1055$code.=<<___;
1056.extern __imp_RtlVirtualUnwind
1057.type mul_handler,\@abi-omnipotent
1058.align 16
1059mul_handler:
1060 _CET_ENDBR
1061 push %rsi
1062 push %rdi
1063 push %rbx
1064 push %rbp
1065 push %r12
1066 push %r13
1067 push %r14
1068 push %r15
1069 pushfq
1070 sub \$64,%rsp
1071
1072 mov 120($context),%rax # pull context->Rax
1073 mov 248($context),%rbx # pull context->Rip
1074
1075 mov 8($disp),%rsi # disp->ImageBase
1076 mov 56($disp),%r11 # disp->HandlerData
1077
1078 mov 0(%r11),%r10d # HandlerData[0]
1079 lea (%rsi,%r10),%r10 # end of prologue label
1080 cmp %r10,%rbx # context->Rip<end of prologue label
1081 jb .Lcommon_seh_tail
1082
1083 lea 48(%rax),%rax
1084
1085 mov 4(%r11),%r10d # HandlerData[1]
1086 lea (%rsi,%r10),%r10 # end of alloca label
1087 cmp %r10,%rbx # context->Rip<end of alloca label
1088 jb .Lcommon_seh_tail
1089
1090 mov 152($context),%rax # pull context->Rsp
1091
1092 mov 8(%r11),%r10d # HandlerData[2]
1093 lea (%rsi,%r10),%r10 # epilogue label
1094 cmp %r10,%rbx # context->Rip>=epilogue label
1095 jae .Lcommon_seh_tail
1096
1097 mov 192($context),%r10 # pull $num
1098 mov 8(%rax,%r10,8),%rax # pull saved stack pointer
1099
1100 lea 48(%rax),%rax
1101
1102 mov -8(%rax),%rbx
1103 mov -16(%rax),%rbp
1104 mov -24(%rax),%r12
1105 mov -32(%rax),%r13
1106 mov -40(%rax),%r14
1107 mov -48(%rax),%r15
1108 mov %rbx,144($context) # restore context->Rbx
1109 mov %rbp,160($context) # restore context->Rbp
1110 mov %r12,216($context) # restore context->R12
1111 mov %r13,224($context) # restore context->R13
1112 mov %r14,232($context) # restore context->R14
1113 mov %r15,240($context) # restore context->R15
1114
1115.Lcommon_seh_tail:
1116 mov 8(%rax),%rdi
1117 mov 16(%rax),%rsi
1118 mov %rax,152($context) # restore context->Rsp
1119 mov %rsi,168($context) # restore context->Rsi
1120 mov %rdi,176($context) # restore context->Rdi
1121
1122 mov 40($disp),%rdi # disp->ContextRecord
1123 mov $context,%rsi # context
1124 mov \$154,%ecx # sizeof(CONTEXT)
1125 .long 0xa548f3fc # cld; rep movsq
1126
1127 mov $disp,%rsi
1128 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
1129 mov 8(%rsi),%rdx # arg2, disp->ImageBase
1130 mov 0(%rsi),%r8 # arg3, disp->ControlPc
1131 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
1132 mov 40(%rsi),%r10 # disp->ContextRecord
1133 lea 56(%rsi),%r11 # &disp->HandlerData
1134 lea 24(%rsi),%r12 # &disp->EstablisherFrame
1135 mov %r10,32(%rsp) # arg5
1136 mov %r11,40(%rsp) # arg6
1137 mov %r12,48(%rsp) # arg7
1138 mov %rcx,56(%rsp) # arg8, (NULL)
1139 call *__imp_RtlVirtualUnwind(%rip)
1140
1141 mov \$1,%eax # ExceptionContinueSearch
1142 add \$64,%rsp
1143 popfq
1144 pop %r15
1145 pop %r14
1146 pop %r13
1147 pop %r12
1148 pop %rbp
1149 pop %rbx
1150 pop %rdi
1151 pop %rsi
1152 ret
1153.size mul_handler,.-mul_handler
1154
1155.section .pdata
1156.align 4
1157 .rva .LSEH_begin_bn_mul_mont_gather5
1158 .rva .LSEH_end_bn_mul_mont_gather5
1159 .rva .LSEH_info_bn_mul_mont_gather5
1160
1161 .rva .LSEH_begin_bn_mul4x_mont_gather5
1162 .rva .LSEH_end_bn_mul4x_mont_gather5
1163 .rva .LSEH_info_bn_mul4x_mont_gather5
1164
1165 .rva .LSEH_begin_bn_gather5
1166 .rva .LSEH_end_bn_gather5
1167 .rva .LSEH_info_bn_gather5
1168
1169.section .xdata
1170.align 8
1171.LSEH_info_bn_mul_mont_gather5:
1172 .byte 9,0,0,0
1173 .rva mul_handler
1174 .rva .Lmul_alloca,.Lmul_body,.Lmul_epilogue # HandlerData[]
1175.align 8
1176.LSEH_info_bn_mul4x_mont_gather5:
1177 .byte 9,0,0,0
1178 .rva mul_handler
1179 .rva .Lmul4x_alloca,.Lmul4x_body,.Lmul4x_epilogue # HandlerData[]
1180.align 8
1181.LSEH_info_bn_gather5:
1182 .byte 0x01,0x0b,0x03,0x0a
1183 .byte 0x0b,0x01,0x21,0x00 # sub rsp,0x108
1184 .byte 0x04,0xa3,0x00,0x00 # lea r10,(rsp), set_frame r10
1185.align 8
1186___
1187}
1188
1189$code =~ s/\`([^\`]*)\`/eval($1)/gem;
1190
1191print $code;
1192close STDOUT;
diff --git a/src/lib/libcrypto/bn/bn.h b/src/lib/libcrypto/bn/bn.h
deleted file mode 100644
index 7c3c0b142f..0000000000
--- a/src/lib/libcrypto/bn/bn.h
+++ /dev/null
@@ -1,520 +0,0 @@
1/* $OpenBSD: bn.h,v 1.80 2025/03/09 15:22:40 tb Exp $ */
2/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111/* ====================================================================
112 * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED.
113 *
114 * Portions of the attached software ("Contribution") are developed by
115 * SUN MICROSYSTEMS, INC., and are contributed to the OpenSSL project.
116 *
117 * The Contribution is licensed pursuant to the Eric Young open source
118 * license provided above.
119 *
120 * The binary polynomial arithmetic software is originally written by
121 * Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems Laboratories.
122 *
123 */
124
125#ifndef HEADER_BN_H
126#define HEADER_BN_H
127
128#include <stdio.h>
129#include <stdlib.h>
130
131#include <openssl/opensslconf.h>
132
133#include <openssl/ossl_typ.h>
134#include <openssl/crypto.h>
135#include <openssl/bio.h>
136
137#ifdef __cplusplus
138extern "C" {
139#endif
140
141/* This next option uses the C libraries (2 word)/(1 word) function.
142 * If it is not defined, I use my C version (which is slower).
143 * The reason for this flag is that when the particular C compiler
144 * library routine is used, and the library is linked with a different
145 * compiler, the library is missing. This mostly happens when the
146 * library is built with gcc and then linked using normal cc. This would
147 * be a common occurrence because gcc normally produces code that is
148 * 2 times faster than system compilers for the big number stuff.
149 * For machines with only one compiler (or shared libraries), this should
150 * be on. Again this in only really a problem on machines
151 * using "long long's", are 32bit, and are not using my assembler code. */
152/* #define BN_DIV2W */
153
154#ifdef _LP64
155#undef BN_LLONG
156#define BN_ULONG unsigned long
157#define BN_LONG long
158#define BN_BITS 128
159#define BN_BYTES 8
160#define BN_BITS2 64
161#define BN_BITS4 32
162#define BN_MASK2 (0xffffffffffffffffL)
163#define BN_MASK2l (0xffffffffL)
164#define BN_MASK2h (0xffffffff00000000L)
165#define BN_MASK2h1 (0xffffffff80000000L)
166#define BN_TBIT (0x8000000000000000L)
167#define BN_DEC_CONV (10000000000000000000UL)
168#define BN_DEC_FMT1 "%lu"
169#define BN_DEC_FMT2 "%019lu"
170#define BN_DEC_NUM 19
171#define BN_HEX_FMT1 "%lX"
172#define BN_HEX_FMT2 "%016lX"
173#else
174#define BN_ULLONG unsigned long long
175#define BN_LLONG
176#define BN_ULONG unsigned int
177#define BN_LONG int
178#define BN_BITS 64
179#define BN_BYTES 4
180#define BN_BITS2 32
181#define BN_BITS4 16
182#define BN_MASK (0xffffffffffffffffLL)
183#define BN_MASK2 (0xffffffffL)
184#define BN_MASK2l (0xffff)
185#define BN_MASK2h1 (0xffff8000L)
186#define BN_MASK2h (0xffff0000L)
187#define BN_TBIT (0x80000000L)
188#define BN_DEC_CONV (1000000000L)
189#define BN_DEC_FMT1 "%u"
190#define BN_DEC_FMT2 "%09u"
191#define BN_DEC_NUM 9
192#define BN_HEX_FMT1 "%X"
193#define BN_HEX_FMT2 "%08X"
194#endif
195
196#define BN_FLG_MALLOCED 0x01
197#define BN_FLG_STATIC_DATA 0x02
198#define BN_FLG_CONSTTIME 0x04 /* avoid leaking exponent information through timing,
199 * BN_mod_exp_mont() will call BN_mod_exp_mont_consttime,
200 * BN_div() will call BN_div_no_branch,
201 * BN_mod_inverse() will call BN_mod_inverse_no_branch.
202 */
203
204void BN_set_flags(BIGNUM *b, int n);
205int BN_get_flags(const BIGNUM *b, int n);
206void BN_with_flags(BIGNUM *dest, const BIGNUM *src, int flags);
207
208/* Values for |top| in BN_rand() */
209#define BN_RAND_TOP_ANY -1
210#define BN_RAND_TOP_ONE 0
211#define BN_RAND_TOP_TWO 1
212
213/* Values for |bottom| in BN_rand() */
214#define BN_RAND_BOTTOM_ANY 0
215#define BN_RAND_BOTTOM_ODD 1
216
217BN_GENCB *BN_GENCB_new(void);
218void BN_GENCB_free(BN_GENCB *cb);
219
220/* Wrapper function to make using BN_GENCB easier, */
221int BN_GENCB_call(BN_GENCB *cb, int a, int b);
222
223/* Populate a BN_GENCB structure with an "old"-style callback */
224void BN_GENCB_set_old(BN_GENCB *gencb, void (*callback)(int, int, void *),
225 void *cb_arg);
226
227/* Populate a BN_GENCB structure with a "new"-style callback */
228void BN_GENCB_set(BN_GENCB *gencb, int (*callback)(int, int, BN_GENCB *),
229 void *cb_arg);
230
231void *BN_GENCB_get_arg(BN_GENCB *cb);
232
233#define BN_prime_checks 0 /* default: select number of iterations
234 based on the size of the number */
235
236/*
237 * BN_prime_checks_for_size() returns the number of Miller-Rabin
238 * iterations that will be done for checking that a random number
239 * is probably prime. The error rate for accepting a composite
240 * number as prime depends on the size of the prime |b|. The error
241 * rates used are for calculating an RSA key with 2 primes, and so
242 * the level is what you would expect for a key of double the size
243 * of the prime.
244 *
245 * This table is generated using the algorithm of FIPS PUB 186-4
246 * Digital Signature Standard (DSS), section F.1, page 117.
247 * (https://dx.doi.org/10.6028/NIST.FIPS.186-4)
248 *
249 * The following magma script was used to generate the output:
250 * securitybits:=125;
251 * k:=1024;
252 * for t:=1 to 65 do
253 * for M:=3 to Floor(2*Sqrt(k-1)-1) do
254 * S:=0;
255 * // Sum over m
256 * for m:=3 to M do
257 * s:=0;
258 * // Sum over j
259 * for j:=2 to m do
260 * s+:=(RealField(32)!2)^-(j+(k-1)/j);
261 * end for;
262 * S+:=2^(m-(m-1)*t)*s;
263 * end for;
264 * A:=2^(k-2-M*t);
265 * B:=8*(Pi(RealField(32))^2-6)/3*2^(k-2)*S;
266 * pkt:=2.00743*Log(2)*k*2^-k*(A+B);
267 * seclevel:=Floor(-Log(2,pkt));
268 * if seclevel ge securitybits then
269 * printf "k: %5o, security: %o bits (t: %o, M: %o)\n",k,seclevel,t,M;
270 * break;
271 * end if;
272 * end for;
273 * if seclevel ge securitybits then break; end if;
274 * end for;
275 *
276 * It can be run online at:
277 * http://magma.maths.usyd.edu.au/calc
278 *
279 * And will output:
280 * k: 1024, security: 129 bits (t: 6, M: 23)
281 *
282 * k is the number of bits of the prime, securitybits is the level
283 * we want to reach.
284 *
285 * prime length | RSA key size | # MR tests | security level
286 * -------------+--------------|------------+---------------
287 * (b) >= 6394 | >= 12788 | 3 | 256 bit
288 * (b) >= 3747 | >= 7494 | 3 | 192 bit
289 * (b) >= 1345 | >= 2690 | 4 | 128 bit
290 * (b) >= 1080 | >= 2160 | 5 | 128 bit
291 * (b) >= 852 | >= 1704 | 5 | 112 bit
292 * (b) >= 476 | >= 952 | 5 | 80 bit
293 * (b) >= 400 | >= 800 | 6 | 80 bit
294 * (b) >= 347 | >= 694 | 7 | 80 bit
295 * (b) >= 308 | >= 616 | 8 | 80 bit
296 * (b) >= 55 | >= 110 | 27 | 64 bit
297 * (b) >= 6 | >= 12 | 34 | 64 bit
298 */
299
300#define BN_prime_checks_for_size(b) ((b) >= 3747 ? 3 : \
301 (b) >= 1345 ? 4 : \
302 (b) >= 476 ? 5 : \
303 (b) >= 400 ? 6 : \
304 (b) >= 347 ? 7 : \
305 (b) >= 308 ? 8 : \
306 (b) >= 55 ? 27 : \
307 /* b >= 6 */ 34)
308
309#define BN_num_bytes(a) ((BN_num_bits(a)+7)/8)
310
311int BN_abs_is_word(const BIGNUM *a, const BN_ULONG w);
312int BN_is_zero(const BIGNUM *a);
313int BN_is_one(const BIGNUM *a);
314int BN_is_word(const BIGNUM *a, const BN_ULONG w);
315int BN_is_odd(const BIGNUM *a);
316
317void BN_zero(BIGNUM *a);
318int BN_one(BIGNUM *a);
319
320const BIGNUM *BN_value_one(void);
321BN_CTX *BN_CTX_new(void);
322void BN_CTX_free(BN_CTX *c);
323void BN_CTX_start(BN_CTX *ctx);
324BIGNUM *BN_CTX_get(BN_CTX *ctx);
325void BN_CTX_end(BN_CTX *ctx);
326int BN_rand(BIGNUM *rnd, int bits, int top, int bottom);
327int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom);
328int BN_rand_range(BIGNUM *rnd, const BIGNUM *range);
329int BN_pseudo_rand_range(BIGNUM *rnd, const BIGNUM *range);
330int BN_num_bits(const BIGNUM *a);
331int BN_num_bits_word(BN_ULONG);
332BIGNUM *BN_new(void);
333void BN_clear_free(BIGNUM *a);
334BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b);
335void BN_swap(BIGNUM *a, BIGNUM *b);
336BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret);
337int BN_bn2bin(const BIGNUM *a, unsigned char *to);
338int BN_bn2binpad(const BIGNUM *a, unsigned char *to, int tolen);
339BIGNUM *BN_lebin2bn(const unsigned char *s, int len, BIGNUM *ret);
340int BN_bn2lebinpad(const BIGNUM *a, unsigned char *to, int tolen);
341BIGNUM *BN_mpi2bn(const unsigned char *s, int len, BIGNUM *ret);
342int BN_bn2mpi(const BIGNUM *a, unsigned char *to);
343int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
344int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
345int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
346int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
347int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
348int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx);
349void BN_set_negative(BIGNUM *b, int n);
350
351int BN_is_negative(const BIGNUM *b);
352
353int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
354 BN_CTX *ctx);
355#define BN_mod(rem,m,d,ctx) BN_div(NULL,(rem),(m),(d),(ctx))
356
357int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx);
358int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx);
359int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m);
360int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx);
361int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m);
362int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
363 const BIGNUM *m, BN_CTX *ctx);
364int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx);
365int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx);
366int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m);
367int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m, BN_CTX *ctx);
368int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m);
369
370BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w);
371BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w);
372int BN_mul_word(BIGNUM *a, BN_ULONG w);
373int BN_add_word(BIGNUM *a, BN_ULONG w);
374int BN_sub_word(BIGNUM *a, BN_ULONG w);
375int BN_set_word(BIGNUM *a, BN_ULONG w);
376BN_ULONG BN_get_word(const BIGNUM *a);
377
378int BN_cmp(const BIGNUM *a, const BIGNUM *b);
379void BN_free(BIGNUM *a);
380int BN_is_bit_set(const BIGNUM *a, int n);
381int BN_lshift(BIGNUM *r, const BIGNUM *a, int n);
382int BN_lshift1(BIGNUM *r, const BIGNUM *a);
383int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx);
384
385int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
386 const BIGNUM *m, BN_CTX *ctx);
387int BN_mod_exp_mont(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
388 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
389int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
390 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont);
391
392int BN_mask_bits(BIGNUM *a, int n);
393int BN_print_fp(FILE *fp, const BIGNUM *a);
394int BN_print(BIO *fp, const BIGNUM *a);
395int BN_rshift(BIGNUM *r, const BIGNUM *a, int n);
396int BN_rshift1(BIGNUM *r, const BIGNUM *a);
397void BN_clear(BIGNUM *a);
398BIGNUM *BN_dup(const BIGNUM *a);
399int BN_ucmp(const BIGNUM *a, const BIGNUM *b);
400int BN_set_bit(BIGNUM *a, int n);
401int BN_clear_bit(BIGNUM *a, int n);
402char * BN_bn2hex(const BIGNUM *a);
403char * BN_bn2dec(const BIGNUM *a);
404int BN_hex2bn(BIGNUM **a, const char *str);
405int BN_dec2bn(BIGNUM **a, const char *str);
406int BN_asc2bn(BIGNUM **a, const char *str);
407int BN_gcd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
408int BN_kronecker(const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); /* returns -2 for error */
409BIGNUM *BN_mod_inverse(BIGNUM *ret,
410 const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx);
411BIGNUM *BN_mod_sqrt(BIGNUM *ret,
412 const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx);
413
414void BN_consttime_swap(BN_ULONG swap, BIGNUM *a, BIGNUM *b, int nwords);
415
416int BN_security_bits(int L, int N);
417
418int BN_generate_prime_ex(BIGNUM *ret, int bits, int safe, const BIGNUM *add,
419 const BIGNUM *rem, BN_GENCB *cb);
420int BN_is_prime_ex(const BIGNUM *p, int nchecks, BN_CTX *ctx, BN_GENCB *cb);
421int BN_is_prime_fasttest_ex(const BIGNUM *p, int nchecks, BN_CTX *ctx,
422 int do_trial_division, BN_GENCB *cb);
423
424BN_MONT_CTX *BN_MONT_CTX_new(void);
425int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
426 BN_MONT_CTX *mont, BN_CTX *ctx);
427int BN_to_montgomery(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont,
428 BN_CTX *ctx);
429int BN_from_montgomery(BIGNUM *r, const BIGNUM *a,
430 BN_MONT_CTX *mont, BN_CTX *ctx);
431void BN_MONT_CTX_free(BN_MONT_CTX *mont);
432int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx);
433BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, const BN_MONT_CTX *from);
434BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock,
435 const BIGNUM *mod, BN_CTX *ctx);
436
437/* Primes from RFC 2409 */
438BIGNUM *BN_get_rfc2409_prime_768(BIGNUM *bn);
439BIGNUM *BN_get_rfc2409_prime_1024(BIGNUM *bn);
440
441/* Primes from RFC 3526 */
442BIGNUM *BN_get_rfc3526_prime_1536(BIGNUM *bn);
443BIGNUM *BN_get_rfc3526_prime_2048(BIGNUM *bn);
444BIGNUM *BN_get_rfc3526_prime_3072(BIGNUM *bn);
445BIGNUM *BN_get_rfc3526_prime_4096(BIGNUM *bn);
446BIGNUM *BN_get_rfc3526_prime_6144(BIGNUM *bn);
447BIGNUM *BN_get_rfc3526_prime_8192(BIGNUM *bn);
448
449void ERR_load_BN_strings(void);
450
451/* Error codes for the BN functions. */
452
453/* Function codes. */
454#define BN_F_BNRAND 127
455#define BN_F_BN_BLINDING_CONVERT_EX 100
456#define BN_F_BN_BLINDING_CREATE_PARAM 128
457#define BN_F_BN_BLINDING_INVERT_EX 101
458#define BN_F_BN_BLINDING_NEW 102
459#define BN_F_BN_BLINDING_UPDATE 103
460#define BN_F_BN_BN2DEC 104
461#define BN_F_BN_BN2HEX 105
462#define BN_F_BN_CTX_GET 116
463#define BN_F_BN_CTX_NEW 106
464#define BN_F_BN_CTX_START 129
465#define BN_F_BN_DIV 107
466#define BN_F_BN_DIV_NO_BRANCH 138
467#define BN_F_BN_DIV_RECP 130
468#define BN_F_BN_EXP 123
469#define BN_F_BN_EXPAND2 108
470#define BN_F_BN_GENERATE_PRIME_EX 140
471#define BN_F_BN_EXPAND_INTERNAL 120
472#define BN_F_BN_GF2M_MOD 131
473#define BN_F_BN_GF2M_MOD_EXP 132
474#define BN_F_BN_GF2M_MOD_MUL 133
475#define BN_F_BN_GF2M_MOD_SOLVE_QUAD 134
476#define BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR 135
477#define BN_F_BN_GF2M_MOD_SQR 136
478#define BN_F_BN_GF2M_MOD_SQRT 137
479#define BN_F_BN_MOD_EXP2_MONT 118
480#define BN_F_BN_MOD_EXP_MONT 109
481#define BN_F_BN_MOD_EXP_MONT_CONSTTIME 124
482#define BN_F_BN_MOD_EXP_MONT_WORD 117
483#define BN_F_BN_MOD_EXP_RECP 125
484#define BN_F_BN_MOD_EXP_SIMPLE 126
485#define BN_F_BN_MOD_INVERSE 110
486#define BN_F_BN_MOD_INVERSE_NO_BRANCH 139
487#define BN_F_BN_MOD_LSHIFT_QUICK 119
488#define BN_F_BN_MOD_MUL_RECIPROCAL 111
489#define BN_F_BN_MOD_SQRT 121
490#define BN_F_BN_MPI2BN 112
491#define BN_F_BN_NEW 113
492#define BN_F_BN_RAND 114
493#define BN_F_BN_RAND_RANGE 122
494#define BN_F_BN_USUB 115
495
496/* Reason codes. */
497#define BN_R_ARG2_LT_ARG3 100
498#define BN_R_BAD_RECIPROCAL 101
499#define BN_R_BIGNUM_TOO_LONG 114
500#define BN_R_BITS_TOO_SMALL 117
501#define BN_R_CALLED_WITH_EVEN_MODULUS 102
502#define BN_R_DIV_BY_ZERO 103
503#define BN_R_ENCODING_ERROR 104
504#define BN_R_EXPAND_ON_STATIC_BIGNUM_DATA 105
505#define BN_R_INPUT_NOT_REDUCED 110
506#define BN_R_INVALID_ARGUMENT 118
507#define BN_R_INVALID_LENGTH 106
508#define BN_R_INVALID_RANGE 115
509#define BN_R_NOT_A_SQUARE 111
510#define BN_R_NOT_INITIALIZED 107
511#define BN_R_NO_INVERSE 108
512#define BN_R_NO_SOLUTION 116
513#define BN_R_P_IS_NOT_PRIME 112
514#define BN_R_TOO_MANY_ITERATIONS 113
515#define BN_R_TOO_MANY_TEMPORARY_VARIABLES 109
516
517#ifdef __cplusplus
518}
519#endif
520#endif
diff --git a/src/lib/libcrypto/bn/bn_add.c b/src/lib/libcrypto/bn/bn_add.c
deleted file mode 100644
index 86768a312a..0000000000
--- a/src/lib/libcrypto/bn/bn_add.c
+++ /dev/null
@@ -1,341 +0,0 @@
1/* $OpenBSD: bn_add.c,v 1.26 2023/07/08 12:21:58 beck Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <assert.h>
60#include <limits.h>
61#include <stdio.h>
62
63#include <openssl/err.h>
64
65#include "bn_arch.h"
66#include "bn_local.h"
67#include "bn_internal.h"
68
69/*
70 * bn_add_words() computes (carry:r[i]) = a[i] + b[i] + carry, where a and b
71 * are both arrays of words. Any carry resulting from the addition is returned.
72 */
73#ifndef HAVE_BN_ADD_WORDS
74BN_ULONG
75bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
76{
77 BN_ULONG carry = 0;
78
79 assert(n >= 0);
80 if (n <= 0)
81 return 0;
82
83 while (n & ~3) {
84 bn_qwaddqw(a[3], a[2], a[1], a[0], b[3], b[2], b[1], b[0],
85 carry, &carry, &r[3], &r[2], &r[1], &r[0]);
86 a += 4;
87 b += 4;
88 r += 4;
89 n -= 4;
90 }
91 while (n) {
92 bn_addw_addw(a[0], b[0], carry, &carry, &r[0]);
93 a++;
94 b++;
95 r++;
96 n--;
97 }
98 return carry;
99}
100#endif
101
102/*
103 * bn_add() computes (carry:r[i]) = a[i] + b[i] + carry, where a and b are both
104 * arrays of words (r may be the same as a or b). The length of a and b may
105 * differ, while r must be at least max(a_len, b_len) in length. Any carry
106 * resulting from the addition is returned.
107 */
108#ifndef HAVE_BN_ADD
109BN_ULONG
110bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b,
111 int b_len)
112{
113 int min_len, diff_len;
114 BN_ULONG carry = 0;
115
116 if ((min_len = a_len) > b_len)
117 min_len = b_len;
118
119 diff_len = a_len - b_len;
120
121 carry = bn_add_words(r, a, b, min_len);
122
123 a += min_len;
124 b += min_len;
125 r += min_len;
126
127 /* XXX - consider doing four at a time to match bn_add_words(). */
128 while (diff_len < 0) {
129 /* Compute r[0] = 0 + b[0] + carry. */
130 bn_addw(b[0], carry, &carry, &r[0]);
131 diff_len++;
132 b++;
133 r++;
134 }
135
136 /* XXX - consider doing four at a time to match bn_add_words(). */
137 while (diff_len > 0) {
138 /* Compute r[0] = a[0] + 0 + carry. */
139 bn_addw(a[0], carry, &carry, &r[0]);
140 diff_len--;
141 a++;
142 r++;
143 }
144
145 return carry;
146}
147#endif
148
149/*
150 * bn_sub_words() computes (borrow:r[i]) = a[i] - b[i] - borrow, where a and b
151 * are both arrays of words. Any borrow resulting from the subtraction is
152 * returned.
153 */
154#ifndef HAVE_BN_SUB_WORDS
155BN_ULONG
156bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
157{
158 BN_ULONG borrow = 0;
159
160 assert(n >= 0);
161 if (n <= 0)
162 return 0;
163
164 while (n & ~3) {
165 bn_qwsubqw(a[3], a[2], a[1], a[0], b[3], b[2], b[1], b[0],
166 borrow, &borrow, &r[3], &r[2], &r[1], &r[0]);
167 a += 4;
168 b += 4;
169 r += 4;
170 n -= 4;
171 }
172 while (n) {
173 bn_subw_subw(a[0], b[0], borrow, &borrow, &r[0]);
174 a++;
175 b++;
176 r++;
177 n--;
178 }
179 return borrow;
180}
181#endif
182
183/*
184 * bn_sub() computes (borrow:r[i]) = a[i] - b[i] - borrow, where a and b are both
185 * arrays of words (r may be the same as a or b). The length of a and b may
186 * differ, while r must be at least max(a_len, b_len) in length. Any borrow
187 * resulting from the subtraction is returned.
188 */
189#ifndef HAVE_BN_SUB
190BN_ULONG
191bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b,
192 int b_len)
193{
194 int min_len, diff_len;
195 BN_ULONG borrow = 0;
196
197 if ((min_len = a_len) > b_len)
198 min_len = b_len;
199
200 diff_len = a_len - b_len;
201
202 borrow = bn_sub_words(r, a, b, min_len);
203
204 a += min_len;
205 b += min_len;
206 r += min_len;
207
208 /* XXX - consider doing four at a time to match bn_sub_words. */
209 while (diff_len < 0) {
210 /* Compute r[0] = 0 - b[0] - borrow. */
211 bn_subw(0 - b[0], borrow, &borrow, &r[0]);
212 diff_len++;
213 b++;
214 r++;
215 }
216
217 /* XXX - consider doing four at a time to match bn_sub_words. */
218 while (diff_len > 0) {
219 /* Compute r[0] = a[0] - 0 - borrow. */
220 bn_subw(a[0], borrow, &borrow, &r[0]);
221 diff_len--;
222 a++;
223 r++;
224 }
225
226 return borrow;
227}
228#endif
229
230int
231BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
232{
233 BN_ULONG carry;
234 int rn;
235
236 if ((rn = a->top) < b->top)
237 rn = b->top;
238 if (rn == INT_MAX)
239 return 0;
240 if (!bn_wexpand(r, rn + 1))
241 return 0;
242
243 carry = bn_add(r->d, rn, a->d, a->top, b->d, b->top);
244 r->d[rn] = carry;
245
246 r->top = rn + (carry & 1);
247 r->neg = 0;
248
249 return 1;
250}
251LCRYPTO_ALIAS(BN_uadd);
252
253int
254BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
255{
256 BN_ULONG borrow;
257 int rn;
258
259 if (a->top < b->top) {
260 BNerror(BN_R_ARG2_LT_ARG3);
261 return 0;
262 }
263 rn = a->top;
264
265 if (!bn_wexpand(r, rn))
266 return 0;
267
268 borrow = bn_sub(r->d, rn, a->d, a->top, b->d, b->top);
269 if (borrow > 0) {
270 BNerror(BN_R_ARG2_LT_ARG3);
271 return 0;
272 }
273
274 r->top = rn;
275 r->neg = 0;
276
277 bn_correct_top(r);
278
279 return 1;
280}
281LCRYPTO_ALIAS(BN_usub);
282
283int
284BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
285{
286 int ret, r_neg;
287
288 if (a->neg == b->neg) {
289 r_neg = a->neg;
290 ret = BN_uadd(r, a, b);
291 } else {
292 int cmp = BN_ucmp(a, b);
293
294 if (cmp > 0) {
295 r_neg = a->neg;
296 ret = BN_usub(r, a, b);
297 } else if (cmp < 0) {
298 r_neg = b->neg;
299 ret = BN_usub(r, b, a);
300 } else {
301 r_neg = 0;
302 BN_zero(r);
303 ret = 1;
304 }
305 }
306
307 BN_set_negative(r, r_neg);
308
309 return ret;
310}
311LCRYPTO_ALIAS(BN_add);
312
313int
314BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
315{
316 int ret, r_neg;
317
318 if (a->neg != b->neg) {
319 r_neg = a->neg;
320 ret = BN_uadd(r, a, b);
321 } else {
322 int cmp = BN_ucmp(a, b);
323
324 if (cmp > 0) {
325 r_neg = a->neg;
326 ret = BN_usub(r, a, b);
327 } else if (cmp < 0) {
328 r_neg = !b->neg;
329 ret = BN_usub(r, b, a);
330 } else {
331 r_neg = 0;
332 BN_zero(r);
333 ret = 1;
334 }
335 }
336
337 BN_set_negative(r, r_neg);
338
339 return ret;
340}
341LCRYPTO_ALIAS(BN_sub);
diff --git a/src/lib/libcrypto/bn/bn_bpsw.c b/src/lib/libcrypto/bn/bn_bpsw.c
deleted file mode 100644
index 04db17b527..0000000000
--- a/src/lib/libcrypto/bn/bn_bpsw.c
+++ /dev/null
@@ -1,531 +0,0 @@
1/* $OpenBSD: bn_bpsw.c,v 1.12 2025/02/13 11:10:01 tb Exp $ */
2/*
3 * Copyright (c) 2022 Martin Grenouilloux <martin.grenouilloux@lse.epita.fr>
4 * Copyright (c) 2022 Theo Buehler <tb@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <openssl/bn.h>
20
21#include "bn_local.h"
22#include "bn_prime.h"
23
24/*
25 * For an odd n compute a / 2 (mod n). If a is even, we can do a plain
26 * division, otherwise calculate (a + n) / 2. Then reduce (mod n).
27 */
28
29static int
30bn_div_by_two_mod_odd_n(BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
31{
32 if (!BN_is_odd(n))
33 return 0;
34
35 if (BN_is_odd(a)) {
36 if (!BN_add(a, a, n))
37 return 0;
38 }
39 if (!BN_rshift1(a, a))
40 return 0;
41 if (!BN_mod_ct(a, a, n, ctx))
42 return 0;
43
44 return 1;
45}
46
47/*
48 * Given the next binary digit of k and the current Lucas terms U and V, this
49 * helper computes the next terms in the Lucas sequence defined as follows:
50 *
51 * U' = U * V (mod n)
52 * V' = (V^2 + D * U^2) / 2 (mod n)
53 *
54 * If digit == 0, bn_lucas_step() returns U' and V'. If digit == 1, it returns
55 *
56 * U'' = (U' + V') / 2 (mod n)
57 * V'' = (V' + D * U') / 2 (mod n)
58 *
59 * Compare with FIPS 186-4, Appendix C.3.3, step 6.
60 */
61
62static int
63bn_lucas_step(BIGNUM *U, BIGNUM *V, int digit, const BIGNUM *D,
64 const BIGNUM *n, BN_CTX *ctx)
65{
66 BIGNUM *tmp;
67 int ret = 0;
68
69 BN_CTX_start(ctx);
70
71 if ((tmp = BN_CTX_get(ctx)) == NULL)
72 goto err;
73
74 /* Calculate D * U^2 before computing U'. */
75 if (!BN_sqr(tmp, U, ctx))
76 goto err;
77 if (!BN_mul(tmp, D, tmp, ctx))
78 goto err;
79
80 /* U' = U * V (mod n). */
81 if (!BN_mod_mul(U, U, V, n, ctx))
82 goto err;
83
84 /* V' = (V^2 + D * U^2) / 2 (mod n). */
85 if (!BN_sqr(V, V, ctx))
86 goto err;
87 if (!BN_add(V, V, tmp))
88 goto err;
89 if (!bn_div_by_two_mod_odd_n(V, n, ctx))
90 goto err;
91
92 if (digit == 1) {
93 /* Calculate D * U' before computing U''. */
94 if (!BN_mul(tmp, D, U, ctx))
95 goto err;
96
97 /* U'' = (U' + V') / 2 (mod n). */
98 if (!BN_add(U, U, V))
99 goto err;
100 if (!bn_div_by_two_mod_odd_n(U, n, ctx))
101 goto err;
102
103 /* V'' = (V' + D * U') / 2 (mod n). */
104 if (!BN_add(V, V, tmp))
105 goto err;
106 if (!bn_div_by_two_mod_odd_n(V, n, ctx))
107 goto err;
108 }
109
110 ret = 1;
111
112 err:
113 BN_CTX_end(ctx);
114
115 return ret;
116}
117
118/*
119 * Compute the Lucas terms U_k, V_k, see FIPS 186-4, Appendix C.3.3, steps 4-6.
120 */
121
122static int
123bn_lucas(BIGNUM *U, BIGNUM *V, const BIGNUM *k, const BIGNUM *D,
124 const BIGNUM *n, BN_CTX *ctx)
125{
126 int digit, i;
127 int ret = 0;
128
129 if (!BN_one(U))
130 goto err;
131 if (!BN_one(V))
132 goto err;
133
134 /*
135 * Iterate over the digits of k from MSB to LSB. Start at digit 2
136 * since the first digit is dealt with by setting U = 1 and V = 1.
137 */
138
139 for (i = BN_num_bits(k) - 2; i >= 0; i--) {
140 digit = BN_is_bit_set(k, i);
141
142 if (!bn_lucas_step(U, V, digit, D, n, ctx))
143 goto err;
144 }
145
146 ret = 1;
147
148 err:
149 return ret;
150}
151
152/*
153 * This is a stronger variant of the Lucas test in FIPS 186-4, Appendix C.3.3.
154 * Every strong Lucas pseudoprime n is also a Lucas pseudoprime since
155 * U_{n+1} == 0 follows from U_k == 0 or V_{k * 2^r} == 0 for 0 <= r < s.
156 */
157
158static int
159bn_strong_lucas_test(int *is_pseudoprime, const BIGNUM *n, const BIGNUM *D,
160 BN_CTX *ctx)
161{
162 BIGNUM *k, *U, *V;
163 int r, s;
164 int ret = 0;
165
166 BN_CTX_start(ctx);
167
168 if ((k = BN_CTX_get(ctx)) == NULL)
169 goto err;
170 if ((U = BN_CTX_get(ctx)) == NULL)
171 goto err;
172 if ((V = BN_CTX_get(ctx)) == NULL)
173 goto err;
174
175 /*
176 * Factorize n + 1 = k * 2^s with odd k: shift away the s trailing ones
177 * of n and set the lowest bit of the resulting number k.
178 */
179
180 s = 0;
181 while (BN_is_bit_set(n, s))
182 s++;
183 if (!BN_rshift(k, n, s))
184 goto err;
185 if (!BN_set_bit(k, 0))
186 goto err;
187
188 /*
189 * Calculate the Lucas terms U_k and V_k. If either of them is zero,
190 * then n is a strong Lucas pseudoprime.
191 */
192
193 if (!bn_lucas(U, V, k, D, n, ctx))
194 goto err;
195
196 if (BN_is_zero(U) || BN_is_zero(V)) {
197 *is_pseudoprime = 1;
198 goto done;
199 }
200
201 /*
202 * Calculate the Lucas terms U_{k * 2^r}, V_{k * 2^r} for 1 <= r < s.
203 * If any V_{k * 2^r} is zero then n is a strong Lucas pseudoprime.
204 */
205
206 for (r = 1; r < s; r++) {
207 if (!bn_lucas_step(U, V, 0, D, n, ctx))
208 goto err;
209
210 if (BN_is_zero(V)) {
211 *is_pseudoprime = 1;
212 goto done;
213 }
214 }
215
216 /*
217 * If we got here, n is definitely composite.
218 */
219
220 *is_pseudoprime = 0;
221
222 done:
223 ret = 1;
224
225 err:
226 BN_CTX_end(ctx);
227
228 return ret;
229}
230
231/*
232 * Test n for primality using the strong Lucas test with Selfridge's Method A.
233 * Returns 1 if n is prime or a strong Lucas-Selfridge pseudoprime.
234 * If it returns 0 then n is definitely composite.
235 */
236
237static int
238bn_strong_lucas_selfridge(int *is_pseudoprime, const BIGNUM *n, BN_CTX *ctx)
239{
240 BIGNUM *D, *two;
241 int is_perfect_square, jacobi_symbol, sign;
242 int ret = 0;
243
244 BN_CTX_start(ctx);
245
246 /* If n is a perfect square, it is composite. */
247 if (!bn_is_perfect_square(&is_perfect_square, n, ctx))
248 goto err;
249 if (is_perfect_square) {
250 *is_pseudoprime = 0;
251 goto done;
252 }
253
254 /*
255 * Find the first D in the Selfridge sequence 5, -7, 9, -11, 13, ...
256 * such that the Jacobi symbol (D/n) is -1.
257 */
258
259 if ((D = BN_CTX_get(ctx)) == NULL)
260 goto err;
261 if ((two = BN_CTX_get(ctx)) == NULL)
262 goto err;
263
264 sign = 1;
265 if (!BN_set_word(D, 5))
266 goto err;
267 if (!BN_set_word(two, 2))
268 goto err;
269
270 while (1) {
271 /* For odd n the Kronecker symbol computes the Jacobi symbol. */
272 if ((jacobi_symbol = BN_kronecker(D, n, ctx)) == -2)
273 goto err;
274
275 /* We found the value for D. */
276 if (jacobi_symbol == -1)
277 break;
278
279 /* n and D have prime factors in common. */
280 if (jacobi_symbol == 0) {
281 *is_pseudoprime = 0;
282 goto done;
283 }
284
285 sign = -sign;
286 if (!BN_uadd(D, D, two))
287 goto err;
288 BN_set_negative(D, sign == -1);
289 }
290
291 if (!bn_strong_lucas_test(is_pseudoprime, n, D, ctx))
292 goto err;
293
294 done:
295 ret = 1;
296
297 err:
298 BN_CTX_end(ctx);
299
300 return ret;
301}
302
303/*
304 * Fermat criterion in Miller-Rabin test.
305 *
306 * Check whether 1 < base < n - 1 witnesses that n is composite. For prime n:
307 *
308 * * Fermat's little theorem: base^(n-1) = 1 (mod n).
309 * * The only square roots of 1 (mod n) are 1 and -1.
310 *
311 * Calculate base^((n-1)/2) by writing n - 1 = k * 2^s with odd k. Iteratively
312 * compute power = (base^k)^(2^(s-1)) by successive squaring of base^k.
313 *
314 * If power ever reaches -1, base^(n-1) is equal to 1 and n is a pseudoprime
315 * for base. If power reaches 1 before -1 during successive squaring, we have
316 * an unexpected square root of 1 and n is composite. Otherwise base^(n-1) != 1,
317 * and n is composite.
318 */
319
320static int
321bn_fermat(int *is_pseudoprime, const BIGNUM *n, const BIGNUM *n_minus_one,
322 const BIGNUM *k, int s, const BIGNUM *base, BN_CTX *ctx, BN_MONT_CTX *mctx)
323{
324 BIGNUM *power;
325 int ret = 0;
326 int i;
327
328 BN_CTX_start(ctx);
329
330 if ((power = BN_CTX_get(ctx)) == NULL)
331 goto err;
332
333 /* Sanity check: ensure that 1 < base < n - 1. */
334 if (BN_cmp(base, BN_value_one()) <= 0 || BN_cmp(base, n_minus_one) >= 0)
335 goto err;
336
337 if (!BN_mod_exp_mont_ct(power, base, k, n, ctx, mctx))
338 goto err;
339
340 if (BN_is_one(power) || BN_cmp(power, n_minus_one) == 0) {
341 *is_pseudoprime = 1;
342 goto done;
343 }
344
345 /* Loop invariant: power is neither 1 nor -1 (mod n). */
346 for (i = 1; i < s; i++) {
347 if (!BN_mod_sqr(power, power, n, ctx))
348 goto err;
349
350 /* n is a pseudoprime for base. */
351 if (BN_cmp(power, n_minus_one) == 0) {
352 *is_pseudoprime = 1;
353 goto done;
354 }
355
356 /* n is composite: there's a square root of unity != 1 or -1. */
357 if (BN_is_one(power)) {
358 *is_pseudoprime = 0;
359 goto done;
360 }
361 }
362
363 /*
364 * If we get here, n is definitely composite: base^(n-1) != 1.
365 */
366
367 *is_pseudoprime = 0;
368
369 done:
370 ret = 1;
371
372 err:
373 BN_CTX_end(ctx);
374
375 return ret;
376}
377
378/*
379 * Miller-Rabin primality test for base 2 and for |rounds| of random bases.
380 * On success: is_pseudoprime == 0 implies that n is composite.
381 */
382
383static int
384bn_miller_rabin(int *is_pseudoprime, const BIGNUM *n, BN_CTX *ctx,
385 size_t rounds)
386{
387 BN_MONT_CTX *mctx = NULL;
388 BIGNUM *base, *k, *n_minus_one;
389 size_t i;
390 int s;
391 int ret = 0;
392
393 BN_CTX_start(ctx);
394
395 if ((base = BN_CTX_get(ctx)) == NULL)
396 goto err;
397 if ((k = BN_CTX_get(ctx)) == NULL)
398 goto err;
399 if ((n_minus_one = BN_CTX_get(ctx)) == NULL)
400 goto err;
401
402 if (BN_is_word(n, 2) || BN_is_word(n, 3)) {
403 *is_pseudoprime = 1;
404 goto done;
405 }
406
407 if (BN_cmp(n, BN_value_one()) <= 0 || !BN_is_odd(n)) {
408 *is_pseudoprime = 0;
409 goto done;
410 }
411
412 if (!BN_sub(n_minus_one, n, BN_value_one()))
413 goto err;
414
415 /*
416 * Factorize n - 1 = k * 2^s.
417 */
418
419 s = 0;
420 while (!BN_is_bit_set(n_minus_one, s))
421 s++;
422 if (!BN_rshift(k, n_minus_one, s))
423 goto err;
424
425 /*
426 * Montgomery setup for n.
427 */
428
429 if ((mctx = BN_MONT_CTX_create(n, ctx)) == NULL)
430 goto err;
431
432 /*
433 * Perform a Miller-Rabin test for base 2 as required by BPSW.
434 */
435
436 if (!BN_set_word(base, 2))
437 goto err;
438
439 if (!bn_fermat(is_pseudoprime, n, n_minus_one, k, s, base, ctx, mctx))
440 goto err;
441 if (!*is_pseudoprime)
442 goto done;
443
444 /*
445 * Perform Miller-Rabin tests with random 3 <= base < n - 1 to reduce
446 * risk of false positives in BPSW.
447 */
448
449 for (i = 0; i < rounds; i++) {
450 if (!bn_rand_interval(base, 3, n_minus_one))
451 goto err;
452
453 if (!bn_fermat(is_pseudoprime, n, n_minus_one, k, s, base, ctx,
454 mctx))
455 goto err;
456 if (!*is_pseudoprime)
457 goto done;
458 }
459
460 /*
461 * If we got here, we have a Miller-Rabin pseudoprime.
462 */
463
464 *is_pseudoprime = 1;
465
466 done:
467 ret = 1;
468
469 err:
470 BN_MONT_CTX_free(mctx);
471 BN_CTX_end(ctx);
472
473 return ret;
474}
475
476/*
477 * The Baillie-Pomerance-Selfridge-Wagstaff algorithm combines a Miller-Rabin
478 * test for base 2 with a Strong Lucas pseudoprime test.
479 */
480
481int
482bn_is_prime_bpsw(int *is_pseudoprime, const BIGNUM *n, BN_CTX *in_ctx,
483 size_t rounds)
484{
485 BN_CTX *ctx = NULL;
486 BN_ULONG mod;
487 int i;
488 int ret = 0;
489
490 if (BN_is_word(n, 2)) {
491 *is_pseudoprime = 1;
492 goto done;
493 }
494
495 if (BN_cmp(n, BN_value_one()) <= 0 || !BN_is_odd(n)) {
496 *is_pseudoprime = 0;
497 goto done;
498 }
499
500 /* Trial divisions with the first 2048 primes. */
501 for (i = 0; i < NUMPRIMES; i++) {
502 if ((mod = BN_mod_word(n, primes[i])) == (BN_ULONG)-1)
503 goto err;
504 if (mod == 0) {
505 *is_pseudoprime = BN_is_word(n, primes[i]);
506 goto done;
507 }
508 }
509
510 if ((ctx = in_ctx) == NULL)
511 ctx = BN_CTX_new();
512 if (ctx == NULL)
513 goto err;
514
515 if (!bn_miller_rabin(is_pseudoprime, n, ctx, rounds))
516 goto err;
517 if (!*is_pseudoprime)
518 goto done;
519
520 if (!bn_strong_lucas_selfridge(is_pseudoprime, n, ctx))
521 goto err;
522
523 done:
524 ret = 1;
525
526 err:
527 if (ctx != in_ctx)
528 BN_CTX_free(ctx);
529
530 return ret;
531}
diff --git a/src/lib/libcrypto/bn/bn_const.c b/src/lib/libcrypto/bn/bn_const.c
deleted file mode 100644
index bf684c8a46..0000000000
--- a/src/lib/libcrypto/bn/bn_const.c
+++ /dev/null
@@ -1,433 +0,0 @@
1/* $OpenBSD: bn_const.c,v 1.8 2023/07/28 10:07:30 tb Exp $ */
2/* Insert boilerplate */
3
4#include <openssl/bn.h>
5
6/*
7 * "First Oakley Default Group" from RFC2409, section 6.1.
8 *
9 * The prime is: 2^768 - 2 ^704 - 1 + 2^64 * { [2^638 pi] + 149686 }
10 *
11 * RFC2409 specifies a generator of 2.
12 * RFC2412 specifies a generator of of 22.
13 */
14
15static const unsigned char RFC2409_PRIME_768[] = {
16 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC9, 0x0F, 0xDA, 0xA2,
17 0x21, 0x68, 0xC2, 0x34, 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
18 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, 0x02, 0x0B, 0xBE, 0xA6,
19 0x3B, 0x13, 0x9B, 0x22, 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
20 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, 0x30, 0x2B, 0x0A, 0x6D,
21 0xF2, 0x5F, 0x14, 0x37, 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
22 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, 0xF4, 0x4C, 0x42, 0xE9,
23 0xA6, 0x3A, 0x36, 0x20, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
24};
25
26BIGNUM *
27BN_get_rfc2409_prime_768(BIGNUM *bn)
28{
29 return BN_bin2bn(RFC2409_PRIME_768, sizeof(RFC2409_PRIME_768), bn);
30}
31LCRYPTO_ALIAS(BN_get_rfc2409_prime_768);
32
33/*
34 * "Second Oakley Default Group" from RFC2409, section 6.2.
35 *
36 * The prime is: 2^1024 - 2^960 - 1 + 2^64 * { [2^894 pi] + 129093 }.
37 *
38 * RFC2409 specifies a generator of 2.
39 * RFC2412 specifies a generator of 22.
40 */
41
42static const unsigned char RFC2409_PRIME_1024[] = {
43 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC9, 0x0F, 0xDA, 0xA2,
44 0x21, 0x68, 0xC2, 0x34, 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
45 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, 0x02, 0x0B, 0xBE, 0xA6,
46 0x3B, 0x13, 0x9B, 0x22, 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
47 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, 0x30, 0x2B, 0x0A, 0x6D,
48 0xF2, 0x5F, 0x14, 0x37, 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
49 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, 0xF4, 0x4C, 0x42, 0xE9,
50 0xA6, 0x37, 0xED, 0x6B, 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
51 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, 0xAE, 0x9F, 0x24, 0x11,
52 0x7C, 0x4B, 0x1F, 0xE6, 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE6, 0x53, 0x81,
53 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
54};
55
56BIGNUM *
57BN_get_rfc2409_prime_1024(BIGNUM *bn)
58{
59 return BN_bin2bn(RFC2409_PRIME_1024, sizeof(RFC2409_PRIME_1024), bn);
60}
61LCRYPTO_ALIAS(BN_get_rfc2409_prime_1024);
62
63/*
64 * "1536-bit MODP Group" from RFC3526, Section 2.
65 *
66 * The prime is: 2^1536 - 2^1472 - 1 + 2^64 * { [2^1406 pi] + 741804 }
67 *
68 * RFC3526 specifies a generator of 2.
69 * RFC2312 specifies a generator of 22.
70 */
71
72static const unsigned char RFC3526_PRIME_1536[] = {
73 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC9, 0x0F, 0xDA, 0xA2,
74 0x21, 0x68, 0xC2, 0x34, 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
75 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, 0x02, 0x0B, 0xBE, 0xA6,
76 0x3B, 0x13, 0x9B, 0x22, 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
77 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, 0x30, 0x2B, 0x0A, 0x6D,
78 0xF2, 0x5F, 0x14, 0x37, 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
79 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, 0xF4, 0x4C, 0x42, 0xE9,
80 0xA6, 0x37, 0xED, 0x6B, 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
81 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, 0xAE, 0x9F, 0x24, 0x11,
82 0x7C, 0x4B, 0x1F, 0xE6, 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
83 0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05, 0x98, 0xDA, 0x48, 0x36,
84 0x1C, 0x55, 0xD3, 0x9A, 0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
85 0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96, 0x1C, 0x62, 0xF3, 0x56,
86 0x20, 0x85, 0x52, 0xBB, 0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
87 0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04, 0xF1, 0x74, 0x6C, 0x08,
88 0xCA, 0x23, 0x73, 0x27, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
89};
90
91BIGNUM *
92BN_get_rfc3526_prime_1536(BIGNUM *bn)
93{
94 return BN_bin2bn(RFC3526_PRIME_1536, sizeof(RFC3526_PRIME_1536), bn);
95}
96LCRYPTO_ALIAS(BN_get_rfc3526_prime_1536);
97
98/*
99 * "2048-bit MODP Group" from RFC3526, Section 3.
100 *
101 * The prime is: 2^2048 - 2^1984 - 1 + 2^64 * { [2^1918 pi] + 124476 }
102 *
103 * RFC3526 specifies a generator of 2.
104 */
105
106static const unsigned char RFC3526_PRIME_2048[] = {
107 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC9, 0x0F, 0xDA, 0xA2,
108 0x21, 0x68, 0xC2, 0x34, 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
109 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, 0x02, 0x0B, 0xBE, 0xA6,
110 0x3B, 0x13, 0x9B, 0x22, 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
111 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, 0x30, 0x2B, 0x0A, 0x6D,
112 0xF2, 0x5F, 0x14, 0x37, 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
113 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, 0xF4, 0x4C, 0x42, 0xE9,
114 0xA6, 0x37, 0xED, 0x6B, 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
115 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, 0xAE, 0x9F, 0x24, 0x11,
116 0x7C, 0x4B, 0x1F, 0xE6, 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
117 0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05, 0x98, 0xDA, 0x48, 0x36,
118 0x1C, 0x55, 0xD3, 0x9A, 0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
119 0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96, 0x1C, 0x62, 0xF3, 0x56,
120 0x20, 0x85, 0x52, 0xBB, 0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
121 0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04, 0xF1, 0x74, 0x6C, 0x08,
122 0xCA, 0x18, 0x21, 0x7C, 0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
123 0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03, 0x9B, 0x27, 0x83, 0xA2,
124 0xEC, 0x07, 0xA2, 0x8F, 0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
125 0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18, 0x39, 0x95, 0x49, 0x7C,
126 0xEA, 0x95, 0x6A, 0xE5, 0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
127 0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAC, 0xAA, 0x68, 0xFF, 0xFF, 0xFF, 0xFF,
128 0xFF, 0xFF, 0xFF, 0xFF,
129};
130
131BIGNUM *
132BN_get_rfc3526_prime_2048(BIGNUM *bn)
133{
134 return BN_bin2bn(RFC3526_PRIME_2048, sizeof(RFC3526_PRIME_2048), bn);
135}
136LCRYPTO_ALIAS(BN_get_rfc3526_prime_2048);
137
138/*
139 * "3072-bit MODP Group" from RFC3526, Section 4.
140 *
141 * The prime is: 2^3072 - 2^3008 - 1 + 2^64 * { [2^2942 pi] + 1690314 }
142 *
143 * RFC3526 specifies a generator of 2.
144 */
145
146static const unsigned char RFC3526_PRIME_3072[] = {
147 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC9, 0x0F, 0xDA, 0xA2,
148 0x21, 0x68, 0xC2, 0x34, 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
149 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, 0x02, 0x0B, 0xBE, 0xA6,
150 0x3B, 0x13, 0x9B, 0x22, 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
151 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, 0x30, 0x2B, 0x0A, 0x6D,
152 0xF2, 0x5F, 0x14, 0x37, 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
153 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, 0xF4, 0x4C, 0x42, 0xE9,
154 0xA6, 0x37, 0xED, 0x6B, 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
155 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, 0xAE, 0x9F, 0x24, 0x11,
156 0x7C, 0x4B, 0x1F, 0xE6, 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
157 0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05, 0x98, 0xDA, 0x48, 0x36,
158 0x1C, 0x55, 0xD3, 0x9A, 0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
159 0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96, 0x1C, 0x62, 0xF3, 0x56,
160 0x20, 0x85, 0x52, 0xBB, 0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
161 0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04, 0xF1, 0x74, 0x6C, 0x08,
162 0xCA, 0x18, 0x21, 0x7C, 0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
163 0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03, 0x9B, 0x27, 0x83, 0xA2,
164 0xEC, 0x07, 0xA2, 0x8F, 0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
165 0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18, 0x39, 0x95, 0x49, 0x7C,
166 0xEA, 0x95, 0x6A, 0xE5, 0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
167 0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D, 0xAD, 0x33, 0x17, 0x0D,
168 0x04, 0x50, 0x7A, 0x33, 0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64,
169 0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A, 0x8A, 0xEA, 0x71, 0x57,
170 0x5D, 0x06, 0x0C, 0x7D, 0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7,
171 0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7, 0x1E, 0x8C, 0x94, 0xE0,
172 0x4A, 0x25, 0x61, 0x9D, 0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B,
173 0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64, 0xD8, 0x76, 0x02, 0x73,
174 0x3E, 0xC8, 0x6A, 0x64, 0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C,
175 0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C, 0x77, 0x09, 0x88, 0xC0,
176 0xBA, 0xD9, 0x46, 0xE2, 0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31,
177 0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E, 0x4B, 0x82, 0xD1, 0x20,
178 0xA9, 0x3A, 0xD2, 0xCA, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
179};
180
181BIGNUM *
182BN_get_rfc3526_prime_3072(BIGNUM *bn)
183{
184 return BN_bin2bn(RFC3526_PRIME_3072, sizeof(RFC3526_PRIME_3072), bn);
185}
186LCRYPTO_ALIAS(BN_get_rfc3526_prime_3072);
187
188/*
189 * "4096-bit MODP Group" from RFC3526, Section 5.
190 *
191 * The prime is: 2^4096 - 2^4032 - 1 + 2^64 * { [2^3966 pi] + 240904 }
192 *
193 * RFC3526 specifies a generator of 2.
194 */
195
196static const unsigned char RFC3526_PRIME_4096[] = {
197 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC9, 0x0F, 0xDA, 0xA2,
198 0x21, 0x68, 0xC2, 0x34, 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
199 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, 0x02, 0x0B, 0xBE, 0xA6,
200 0x3B, 0x13, 0x9B, 0x22, 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
201 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, 0x30, 0x2B, 0x0A, 0x6D,
202 0xF2, 0x5F, 0x14, 0x37, 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
203 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, 0xF4, 0x4C, 0x42, 0xE9,
204 0xA6, 0x37, 0xED, 0x6B, 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
205 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, 0xAE, 0x9F, 0x24, 0x11,
206 0x7C, 0x4B, 0x1F, 0xE6, 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
207 0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05, 0x98, 0xDA, 0x48, 0x36,
208 0x1C, 0x55, 0xD3, 0x9A, 0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
209 0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96, 0x1C, 0x62, 0xF3, 0x56,
210 0x20, 0x85, 0x52, 0xBB, 0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
211 0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04, 0xF1, 0x74, 0x6C, 0x08,
212 0xCA, 0x18, 0x21, 0x7C, 0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
213 0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03, 0x9B, 0x27, 0x83, 0xA2,
214 0xEC, 0x07, 0xA2, 0x8F, 0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
215 0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18, 0x39, 0x95, 0x49, 0x7C,
216 0xEA, 0x95, 0x6A, 0xE5, 0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
217 0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D, 0xAD, 0x33, 0x17, 0x0D,
218 0x04, 0x50, 0x7A, 0x33, 0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64,
219 0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A, 0x8A, 0xEA, 0x71, 0x57,
220 0x5D, 0x06, 0x0C, 0x7D, 0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7,
221 0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7, 0x1E, 0x8C, 0x94, 0xE0,
222 0x4A, 0x25, 0x61, 0x9D, 0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B,
223 0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64, 0xD8, 0x76, 0x02, 0x73,
224 0x3E, 0xC8, 0x6A, 0x64, 0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C,
225 0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C, 0x77, 0x09, 0x88, 0xC0,
226 0xBA, 0xD9, 0x46, 0xE2, 0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31,
227 0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E, 0x4B, 0x82, 0xD1, 0x20,
228 0xA9, 0x21, 0x08, 0x01, 0x1A, 0x72, 0x3C, 0x12, 0xA7, 0x87, 0xE6, 0xD7,
229 0x88, 0x71, 0x9A, 0x10, 0xBD, 0xBA, 0x5B, 0x26, 0x99, 0xC3, 0x27, 0x18,
230 0x6A, 0xF4, 0xE2, 0x3C, 0x1A, 0x94, 0x68, 0x34, 0xB6, 0x15, 0x0B, 0xDA,
231 0x25, 0x83, 0xE9, 0xCA, 0x2A, 0xD4, 0x4C, 0xE8, 0xDB, 0xBB, 0xC2, 0xDB,
232 0x04, 0xDE, 0x8E, 0xF9, 0x2E, 0x8E, 0xFC, 0x14, 0x1F, 0xBE, 0xCA, 0xA6,
233 0x28, 0x7C, 0x59, 0x47, 0x4E, 0x6B, 0xC0, 0x5D, 0x99, 0xB2, 0x96, 0x4F,
234 0xA0, 0x90, 0xC3, 0xA2, 0x23, 0x3B, 0xA1, 0x86, 0x51, 0x5B, 0xE7, 0xED,
235 0x1F, 0x61, 0x29, 0x70, 0xCE, 0xE2, 0xD7, 0xAF, 0xB8, 0x1B, 0xDD, 0x76,
236 0x21, 0x70, 0x48, 0x1C, 0xD0, 0x06, 0x91, 0x27, 0xD5, 0xB0, 0x5A, 0xA9,
237 0x93, 0xB4, 0xEA, 0x98, 0x8D, 0x8F, 0xDD, 0xC1, 0x86, 0xFF, 0xB7, 0xDC,
238 0x90, 0xA6, 0xC0, 0x8F, 0x4D, 0xF4, 0x35, 0xC9, 0x34, 0x06, 0x31, 0x99,
239 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
240};
241
242BIGNUM *
243BN_get_rfc3526_prime_4096(BIGNUM *bn)
244{
245 return BN_bin2bn(RFC3526_PRIME_4096, sizeof(RFC3526_PRIME_4096), bn);
246}
247LCRYPTO_ALIAS(BN_get_rfc3526_prime_4096);
248
249/*
250 * "6144-bit MODP Group" from RFC3526, Section 6.
251 *
252 * The prime is: 2^6144 - 2^6080 - 1 + 2^64 * { [2^6014 pi] + 929484 }
253 *
254 * RFC3526 specifies a generator of 2.
255 */
256
257static const unsigned char RFC3526_PRIME_6144[] = {
258 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC9, 0x0F, 0xDA, 0xA2,
259 0x21, 0x68, 0xC2, 0x34, 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
260 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, 0x02, 0x0B, 0xBE, 0xA6,
261 0x3B, 0x13, 0x9B, 0x22, 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
262 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, 0x30, 0x2B, 0x0A, 0x6D,
263 0xF2, 0x5F, 0x14, 0x37, 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
264 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, 0xF4, 0x4C, 0x42, 0xE9,
265 0xA6, 0x37, 0xED, 0x6B, 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
266 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, 0xAE, 0x9F, 0x24, 0x11,
267 0x7C, 0x4B, 0x1F, 0xE6, 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
268 0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05, 0x98, 0xDA, 0x48, 0x36,
269 0x1C, 0x55, 0xD3, 0x9A, 0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
270 0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96, 0x1C, 0x62, 0xF3, 0x56,
271 0x20, 0x85, 0x52, 0xBB, 0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
272 0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04, 0xF1, 0x74, 0x6C, 0x08,
273 0xCA, 0x18, 0x21, 0x7C, 0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
274 0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03, 0x9B, 0x27, 0x83, 0xA2,
275 0xEC, 0x07, 0xA2, 0x8F, 0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
276 0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18, 0x39, 0x95, 0x49, 0x7C,
277 0xEA, 0x95, 0x6A, 0xE5, 0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
278 0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D, 0xAD, 0x33, 0x17, 0x0D,
279 0x04, 0x50, 0x7A, 0x33, 0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64,
280 0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A, 0x8A, 0xEA, 0x71, 0x57,
281 0x5D, 0x06, 0x0C, 0x7D, 0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7,
282 0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7, 0x1E, 0x8C, 0x94, 0xE0,
283 0x4A, 0x25, 0x61, 0x9D, 0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B,
284 0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64, 0xD8, 0x76, 0x02, 0x73,
285 0x3E, 0xC8, 0x6A, 0x64, 0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C,
286 0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C, 0x77, 0x09, 0x88, 0xC0,
287 0xBA, 0xD9, 0x46, 0xE2, 0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31,
288 0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E, 0x4B, 0x82, 0xD1, 0x20,
289 0xA9, 0x21, 0x08, 0x01, 0x1A, 0x72, 0x3C, 0x12, 0xA7, 0x87, 0xE6, 0xD7,
290 0x88, 0x71, 0x9A, 0x10, 0xBD, 0xBA, 0x5B, 0x26, 0x99, 0xC3, 0x27, 0x18,
291 0x6A, 0xF4, 0xE2, 0x3C, 0x1A, 0x94, 0x68, 0x34, 0xB6, 0x15, 0x0B, 0xDA,
292 0x25, 0x83, 0xE9, 0xCA, 0x2A, 0xD4, 0x4C, 0xE8, 0xDB, 0xBB, 0xC2, 0xDB,
293 0x04, 0xDE, 0x8E, 0xF9, 0x2E, 0x8E, 0xFC, 0x14, 0x1F, 0xBE, 0xCA, 0xA6,
294 0x28, 0x7C, 0x59, 0x47, 0x4E, 0x6B, 0xC0, 0x5D, 0x99, 0xB2, 0x96, 0x4F,
295 0xA0, 0x90, 0xC3, 0xA2, 0x23, 0x3B, 0xA1, 0x86, 0x51, 0x5B, 0xE7, 0xED,
296 0x1F, 0x61, 0x29, 0x70, 0xCE, 0xE2, 0xD7, 0xAF, 0xB8, 0x1B, 0xDD, 0x76,
297 0x21, 0x70, 0x48, 0x1C, 0xD0, 0x06, 0x91, 0x27, 0xD5, 0xB0, 0x5A, 0xA9,
298 0x93, 0xB4, 0xEA, 0x98, 0x8D, 0x8F, 0xDD, 0xC1, 0x86, 0xFF, 0xB7, 0xDC,
299 0x90, 0xA6, 0xC0, 0x8F, 0x4D, 0xF4, 0x35, 0xC9, 0x34, 0x02, 0x84, 0x92,
300 0x36, 0xC3, 0xFA, 0xB4, 0xD2, 0x7C, 0x70, 0x26, 0xC1, 0xD4, 0xDC, 0xB2,
301 0x60, 0x26, 0x46, 0xDE, 0xC9, 0x75, 0x1E, 0x76, 0x3D, 0xBA, 0x37, 0xBD,
302 0xF8, 0xFF, 0x94, 0x06, 0xAD, 0x9E, 0x53, 0x0E, 0xE5, 0xDB, 0x38, 0x2F,
303 0x41, 0x30, 0x01, 0xAE, 0xB0, 0x6A, 0x53, 0xED, 0x90, 0x27, 0xD8, 0x31,
304 0x17, 0x97, 0x27, 0xB0, 0x86, 0x5A, 0x89, 0x18, 0xDA, 0x3E, 0xDB, 0xEB,
305 0xCF, 0x9B, 0x14, 0xED, 0x44, 0xCE, 0x6C, 0xBA, 0xCE, 0xD4, 0xBB, 0x1B,
306 0xDB, 0x7F, 0x14, 0x47, 0xE6, 0xCC, 0x25, 0x4B, 0x33, 0x20, 0x51, 0x51,
307 0x2B, 0xD7, 0xAF, 0x42, 0x6F, 0xB8, 0xF4, 0x01, 0x37, 0x8C, 0xD2, 0xBF,
308 0x59, 0x83, 0xCA, 0x01, 0xC6, 0x4B, 0x92, 0xEC, 0xF0, 0x32, 0xEA, 0x15,
309 0xD1, 0x72, 0x1D, 0x03, 0xF4, 0x82, 0xD7, 0xCE, 0x6E, 0x74, 0xFE, 0xF6,
310 0xD5, 0x5E, 0x70, 0x2F, 0x46, 0x98, 0x0C, 0x82, 0xB5, 0xA8, 0x40, 0x31,
311 0x90, 0x0B, 0x1C, 0x9E, 0x59, 0xE7, 0xC9, 0x7F, 0xBE, 0xC7, 0xE8, 0xF3,
312 0x23, 0xA9, 0x7A, 0x7E, 0x36, 0xCC, 0x88, 0xBE, 0x0F, 0x1D, 0x45, 0xB7,
313 0xFF, 0x58, 0x5A, 0xC5, 0x4B, 0xD4, 0x07, 0xB2, 0x2B, 0x41, 0x54, 0xAA,
314 0xCC, 0x8F, 0x6D, 0x7E, 0xBF, 0x48, 0xE1, 0xD8, 0x14, 0xCC, 0x5E, 0xD2,
315 0x0F, 0x80, 0x37, 0xE0, 0xA7, 0x97, 0x15, 0xEE, 0xF2, 0x9B, 0xE3, 0x28,
316 0x06, 0xA1, 0xD5, 0x8B, 0xB7, 0xC5, 0xDA, 0x76, 0xF5, 0x50, 0xAA, 0x3D,
317 0x8A, 0x1F, 0xBF, 0xF0, 0xEB, 0x19, 0xCC, 0xB1, 0xA3, 0x13, 0xD5, 0x5C,
318 0xDA, 0x56, 0xC9, 0xEC, 0x2E, 0xF2, 0x96, 0x32, 0x38, 0x7F, 0xE8, 0xD7,
319 0x6E, 0x3C, 0x04, 0x68, 0x04, 0x3E, 0x8F, 0x66, 0x3F, 0x48, 0x60, 0xEE,
320 0x12, 0xBF, 0x2D, 0x5B, 0x0B, 0x74, 0x74, 0xD6, 0xE6, 0x94, 0xF9, 0x1E,
321 0x6D, 0xCC, 0x40, 0x24, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
322};
323
324BIGNUM *
325BN_get_rfc3526_prime_6144(BIGNUM *bn)
326{
327 return BN_bin2bn(RFC3526_PRIME_6144, sizeof(RFC3526_PRIME_6144), bn);
328}
329LCRYPTO_ALIAS(BN_get_rfc3526_prime_6144);
330
331/*
332 * "8192-bit MODP Group" from RFC3526, Section 7.
333 *
334 * The prime is: 2^8192 - 2^8128 - 1 + 2^64 * { [2^8062 pi] + 4743158 }
335 *
336 * RFC3526 specifies a generator of 2.
337 */
338
339static const unsigned char RFC3526_PRIME_8192[] = {
340 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC9, 0x0F, 0xDA, 0xA2,
341 0x21, 0x68, 0xC2, 0x34, 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
342 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, 0x02, 0x0B, 0xBE, 0xA6,
343 0x3B, 0x13, 0x9B, 0x22, 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
344 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, 0x30, 0x2B, 0x0A, 0x6D,
345 0xF2, 0x5F, 0x14, 0x37, 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
346 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, 0xF4, 0x4C, 0x42, 0xE9,
347 0xA6, 0x37, 0xED, 0x6B, 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
348 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, 0xAE, 0x9F, 0x24, 0x11,
349 0x7C, 0x4B, 0x1F, 0xE6, 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
350 0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05, 0x98, 0xDA, 0x48, 0x36,
351 0x1C, 0x55, 0xD3, 0x9A, 0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
352 0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96, 0x1C, 0x62, 0xF3, 0x56,
353 0x20, 0x85, 0x52, 0xBB, 0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
354 0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04, 0xF1, 0x74, 0x6C, 0x08,
355 0xCA, 0x18, 0x21, 0x7C, 0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
356 0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03, 0x9B, 0x27, 0x83, 0xA2,
357 0xEC, 0x07, 0xA2, 0x8F, 0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
358 0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18, 0x39, 0x95, 0x49, 0x7C,
359 0xEA, 0x95, 0x6A, 0xE5, 0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
360 0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D, 0xAD, 0x33, 0x17, 0x0D,
361 0x04, 0x50, 0x7A, 0x33, 0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64,
362 0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A, 0x8A, 0xEA, 0x71, 0x57,
363 0x5D, 0x06, 0x0C, 0x7D, 0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7,
364 0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7, 0x1E, 0x8C, 0x94, 0xE0,
365 0x4A, 0x25, 0x61, 0x9D, 0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B,
366 0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64, 0xD8, 0x76, 0x02, 0x73,
367 0x3E, 0xC8, 0x6A, 0x64, 0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C,
368 0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C, 0x77, 0x09, 0x88, 0xC0,
369 0xBA, 0xD9, 0x46, 0xE2, 0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31,
370 0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E, 0x4B, 0x82, 0xD1, 0x20,
371 0xA9, 0x21, 0x08, 0x01, 0x1A, 0x72, 0x3C, 0x12, 0xA7, 0x87, 0xE6, 0xD7,
372 0x88, 0x71, 0x9A, 0x10, 0xBD, 0xBA, 0x5B, 0x26, 0x99, 0xC3, 0x27, 0x18,
373 0x6A, 0xF4, 0xE2, 0x3C, 0x1A, 0x94, 0x68, 0x34, 0xB6, 0x15, 0x0B, 0xDA,
374 0x25, 0x83, 0xE9, 0xCA, 0x2A, 0xD4, 0x4C, 0xE8, 0xDB, 0xBB, 0xC2, 0xDB,
375 0x04, 0xDE, 0x8E, 0xF9, 0x2E, 0x8E, 0xFC, 0x14, 0x1F, 0xBE, 0xCA, 0xA6,
376 0x28, 0x7C, 0x59, 0x47, 0x4E, 0x6B, 0xC0, 0x5D, 0x99, 0xB2, 0x96, 0x4F,
377 0xA0, 0x90, 0xC3, 0xA2, 0x23, 0x3B, 0xA1, 0x86, 0x51, 0x5B, 0xE7, 0xED,
378 0x1F, 0x61, 0x29, 0x70, 0xCE, 0xE2, 0xD7, 0xAF, 0xB8, 0x1B, 0xDD, 0x76,
379 0x21, 0x70, 0x48, 0x1C, 0xD0, 0x06, 0x91, 0x27, 0xD5, 0xB0, 0x5A, 0xA9,
380 0x93, 0xB4, 0xEA, 0x98, 0x8D, 0x8F, 0xDD, 0xC1, 0x86, 0xFF, 0xB7, 0xDC,
381 0x90, 0xA6, 0xC0, 0x8F, 0x4D, 0xF4, 0x35, 0xC9, 0x34, 0x02, 0x84, 0x92,
382 0x36, 0xC3, 0xFA, 0xB4, 0xD2, 0x7C, 0x70, 0x26, 0xC1, 0xD4, 0xDC, 0xB2,
383 0x60, 0x26, 0x46, 0xDE, 0xC9, 0x75, 0x1E, 0x76, 0x3D, 0xBA, 0x37, 0xBD,
384 0xF8, 0xFF, 0x94, 0x06, 0xAD, 0x9E, 0x53, 0x0E, 0xE5, 0xDB, 0x38, 0x2F,
385 0x41, 0x30, 0x01, 0xAE, 0xB0, 0x6A, 0x53, 0xED, 0x90, 0x27, 0xD8, 0x31,
386 0x17, 0x97, 0x27, 0xB0, 0x86, 0x5A, 0x89, 0x18, 0xDA, 0x3E, 0xDB, 0xEB,
387 0xCF, 0x9B, 0x14, 0xED, 0x44, 0xCE, 0x6C, 0xBA, 0xCE, 0xD4, 0xBB, 0x1B,
388 0xDB, 0x7F, 0x14, 0x47, 0xE6, 0xCC, 0x25, 0x4B, 0x33, 0x20, 0x51, 0x51,
389 0x2B, 0xD7, 0xAF, 0x42, 0x6F, 0xB8, 0xF4, 0x01, 0x37, 0x8C, 0xD2, 0xBF,
390 0x59, 0x83, 0xCA, 0x01, 0xC6, 0x4B, 0x92, 0xEC, 0xF0, 0x32, 0xEA, 0x15,
391 0xD1, 0x72, 0x1D, 0x03, 0xF4, 0x82, 0xD7, 0xCE, 0x6E, 0x74, 0xFE, 0xF6,
392 0xD5, 0x5E, 0x70, 0x2F, 0x46, 0x98, 0x0C, 0x82, 0xB5, 0xA8, 0x40, 0x31,
393 0x90, 0x0B, 0x1C, 0x9E, 0x59, 0xE7, 0xC9, 0x7F, 0xBE, 0xC7, 0xE8, 0xF3,
394 0x23, 0xA9, 0x7A, 0x7E, 0x36, 0xCC, 0x88, 0xBE, 0x0F, 0x1D, 0x45, 0xB7,
395 0xFF, 0x58, 0x5A, 0xC5, 0x4B, 0xD4, 0x07, 0xB2, 0x2B, 0x41, 0x54, 0xAA,
396 0xCC, 0x8F, 0x6D, 0x7E, 0xBF, 0x48, 0xE1, 0xD8, 0x14, 0xCC, 0x5E, 0xD2,
397 0x0F, 0x80, 0x37, 0xE0, 0xA7, 0x97, 0x15, 0xEE, 0xF2, 0x9B, 0xE3, 0x28,
398 0x06, 0xA1, 0xD5, 0x8B, 0xB7, 0xC5, 0xDA, 0x76, 0xF5, 0x50, 0xAA, 0x3D,
399 0x8A, 0x1F, 0xBF, 0xF0, 0xEB, 0x19, 0xCC, 0xB1, 0xA3, 0x13, 0xD5, 0x5C,
400 0xDA, 0x56, 0xC9, 0xEC, 0x2E, 0xF2, 0x96, 0x32, 0x38, 0x7F, 0xE8, 0xD7,
401 0x6E, 0x3C, 0x04, 0x68, 0x04, 0x3E, 0x8F, 0x66, 0x3F, 0x48, 0x60, 0xEE,
402 0x12, 0xBF, 0x2D, 0x5B, 0x0B, 0x74, 0x74, 0xD6, 0xE6, 0x94, 0xF9, 0x1E,
403 0x6D, 0xBE, 0x11, 0x59, 0x74, 0xA3, 0x92, 0x6F, 0x12, 0xFE, 0xE5, 0xE4,
404 0x38, 0x77, 0x7C, 0xB6, 0xA9, 0x32, 0xDF, 0x8C, 0xD8, 0xBE, 0xC4, 0xD0,
405 0x73, 0xB9, 0x31, 0xBA, 0x3B, 0xC8, 0x32, 0xB6, 0x8D, 0x9D, 0xD3, 0x00,
406 0x74, 0x1F, 0xA7, 0xBF, 0x8A, 0xFC, 0x47, 0xED, 0x25, 0x76, 0xF6, 0x93,
407 0x6B, 0xA4, 0x24, 0x66, 0x3A, 0xAB, 0x63, 0x9C, 0x5A, 0xE4, 0xF5, 0x68,
408 0x34, 0x23, 0xB4, 0x74, 0x2B, 0xF1, 0xC9, 0x78, 0x23, 0x8F, 0x16, 0xCB,
409 0xE3, 0x9D, 0x65, 0x2D, 0xE3, 0xFD, 0xB8, 0xBE, 0xFC, 0x84, 0x8A, 0xD9,
410 0x22, 0x22, 0x2E, 0x04, 0xA4, 0x03, 0x7C, 0x07, 0x13, 0xEB, 0x57, 0xA8,
411 0x1A, 0x23, 0xF0, 0xC7, 0x34, 0x73, 0xFC, 0x64, 0x6C, 0xEA, 0x30, 0x6B,
412 0x4B, 0xCB, 0xC8, 0x86, 0x2F, 0x83, 0x85, 0xDD, 0xFA, 0x9D, 0x4B, 0x7F,
413 0xA2, 0xC0, 0x87, 0xE8, 0x79, 0x68, 0x33, 0x03, 0xED, 0x5B, 0xDD, 0x3A,
414 0x06, 0x2B, 0x3C, 0xF5, 0xB3, 0xA2, 0x78, 0xA6, 0x6D, 0x2A, 0x13, 0xF8,
415 0x3F, 0x44, 0xF8, 0x2D, 0xDF, 0x31, 0x0E, 0xE0, 0x74, 0xAB, 0x6A, 0x36,
416 0x45, 0x97, 0xE8, 0x99, 0xA0, 0x25, 0x5D, 0xC1, 0x64, 0xF3, 0x1C, 0xC5,
417 0x08, 0x46, 0x85, 0x1D, 0xF9, 0xAB, 0x48, 0x19, 0x5D, 0xED, 0x7E, 0xA1,
418 0xB1, 0xD5, 0x10, 0xBD, 0x7E, 0xE7, 0x4D, 0x73, 0xFA, 0xF3, 0x6B, 0xC3,
419 0x1E, 0xCF, 0xA2, 0x68, 0x35, 0x90, 0x46, 0xF4, 0xEB, 0x87, 0x9F, 0x92,
420 0x40, 0x09, 0x43, 0x8B, 0x48, 0x1C, 0x6C, 0xD7, 0x88, 0x9A, 0x00, 0x2E,
421 0xD5, 0xEE, 0x38, 0x2B, 0xC9, 0x19, 0x0D, 0xA6, 0xFC, 0x02, 0x6E, 0x47,
422 0x95, 0x58, 0xE4, 0x47, 0x56, 0x77, 0xE9, 0xAA, 0x9E, 0x30, 0x50, 0xE2,
423 0x76, 0x56, 0x94, 0xDF, 0xC8, 0x1F, 0x56, 0xE8, 0x80, 0xB9, 0x6E, 0x71,
424 0x60, 0xC9, 0x80, 0xDD, 0x98, 0xED, 0xD3, 0xDF, 0xFF, 0xFF, 0xFF, 0xFF,
425 0xFF, 0xFF, 0xFF, 0xFF,
426};
427
428BIGNUM *
429BN_get_rfc3526_prime_8192(BIGNUM *bn)
430{
431 return BN_bin2bn(RFC3526_PRIME_8192, sizeof(RFC3526_PRIME_8192), bn);
432}
433LCRYPTO_ALIAS(BN_get_rfc3526_prime_8192);
diff --git a/src/lib/libcrypto/bn/bn_convert.c b/src/lib/libcrypto/bn/bn_convert.c
deleted file mode 100644
index 6a6354f44e..0000000000
--- a/src/lib/libcrypto/bn/bn_convert.c
+++ /dev/null
@@ -1,757 +0,0 @@
1/* $OpenBSD: bn_convert.c,v 1.23 2024/11/08 14:18:44 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <ctype.h>
60#include <limits.h>
61#include <stdio.h>
62#include <string.h>
63
64#include <openssl/opensslconf.h>
65
66#include <openssl/bio.h>
67#include <openssl/buffer.h>
68#include <openssl/err.h>
69
70#include "bn_local.h"
71#include "bytestring.h"
72#include "crypto_internal.h"
73
74static int bn_dec2bn_cbs(BIGNUM **bnp, CBS *cbs);
75static int bn_hex2bn_cbs(BIGNUM **bnp, CBS *cbs);
76
77static const char hex_digits[] = "0123456789ABCDEF";
78
79static int
80bn_bn2binpad_internal(const BIGNUM *bn, uint8_t *out, int out_len,
81 int little_endian)
82{
83 uint8_t mask, v;
84 BN_ULONG w;
85 int i, j;
86 int b, n;
87
88 n = BN_num_bytes(bn);
89
90 if (out_len == -1)
91 out_len = n;
92 if (out_len < n)
93 return -1;
94
95 if (bn->dmax == 0) {
96 explicit_bzero(out, out_len);
97 return out_len;
98 }
99
100 mask = 0;
101 b = BN_BITS2;
102 j = 0;
103
104 for (i = out_len - 1; i >= 0; i--) {
105 if (b == BN_BITS2) {
106 mask = crypto_ct_lt_mask(j, bn->top);
107 w = bn->d[j++ % bn->dmax];
108 b = 0;
109 }
110 out[i] = (w >> b) & mask;
111 b += 8;
112 }
113
114 if (little_endian) {
115 for (i = 0, j = out_len - 1; i < out_len / 2; i++, j--) {
116 v = out[i];
117 out[i] = out[j];
118 out[j] = v;
119 }
120 }
121
122 return out_len;
123}
124
125int
126BN_bn2bin(const BIGNUM *bn, unsigned char *to)
127{
128 return bn_bn2binpad_internal(bn, to, -1, 0);
129}
130LCRYPTO_ALIAS(BN_bn2bin);
131
132int
133BN_bn2binpad(const BIGNUM *bn, unsigned char *to, int to_len)
134{
135 if (to_len < 0)
136 return -1;
137
138 return bn_bn2binpad_internal(bn, to, to_len, 0);
139}
140LCRYPTO_ALIAS(BN_bn2binpad);
141
142static int
143bn_bin2bn_cbs(BIGNUM **bnp, CBS *cbs, int lebin)
144{
145 BIGNUM *bn = NULL;
146 BN_ULONG w;
147 uint8_t v;
148 int b, i;
149
150 if ((bn = *bnp) == NULL)
151 bn = BN_new();
152 if (bn == NULL)
153 goto err;
154 if (!bn_expand_bytes(bn, CBS_len(cbs)))
155 goto err;
156
157 b = 0;
158 i = 0;
159 w = 0;
160
161 while (CBS_len(cbs) > 0) {
162 if (lebin) {
163 if (!CBS_get_u8(cbs, &v))
164 goto err;
165 } else {
166 if (!CBS_get_last_u8(cbs, &v))
167 goto err;
168 }
169
170 w |= (BN_ULONG)v << b;
171 b += 8;
172
173 if (b == BN_BITS2 || CBS_len(cbs) == 0) {
174 b = 0;
175 bn->d[i++] = w;
176 w = 0;
177 }
178 }
179
180 bn->neg = 0;
181 bn->top = i;
182
183 bn_correct_top(bn);
184
185 *bnp = bn;
186
187 return 1;
188
189 err:
190 if (*bnp == NULL)
191 BN_free(bn);
192
193 return 0;
194}
195
196BIGNUM *
197BN_bin2bn(const unsigned char *d, int len, BIGNUM *bn)
198{
199 CBS cbs;
200
201 if (len < 0)
202 return NULL;
203
204 CBS_init(&cbs, d, len);
205
206 if (!bn_bin2bn_cbs(&bn, &cbs, 0))
207 return NULL;
208
209 return bn;
210}
211LCRYPTO_ALIAS(BN_bin2bn);
212
213int
214BN_bn2lebinpad(const BIGNUM *bn, unsigned char *to, int to_len)
215{
216 if (to_len < 0)
217 return -1;
218
219 return bn_bn2binpad_internal(bn, to, to_len, 1);
220}
221LCRYPTO_ALIAS(BN_bn2lebinpad);
222
223BIGNUM *
224BN_lebin2bn(const unsigned char *d, int len, BIGNUM *bn)
225{
226 CBS cbs;
227
228 if (len < 0)
229 return NULL;
230
231 CBS_init(&cbs, d, len);
232
233 if (!bn_bin2bn_cbs(&bn, &cbs, 1))
234 return NULL;
235
236 return bn;
237}
238LCRYPTO_ALIAS(BN_lebin2bn);
239
240int
241BN_asc2bn(BIGNUM **bnp, const char *s)
242{
243 CBS cbs, cbs_hex;
244 size_t s_len;
245 uint8_t v;
246 int neg;
247
248 if (bnp != NULL && *bnp != NULL)
249 BN_zero(*bnp);
250
251 if (s == NULL)
252 return 0;
253 if ((s_len = strlen(s)) == 0)
254 return 0;
255
256 CBS_init(&cbs, s, s_len);
257
258 /* Handle negative sign. */
259 if (!CBS_peek_u8(&cbs, &v))
260 return 0;
261 if ((neg = (v == '-'))) {
262 if (!CBS_skip(&cbs, 1))
263 return 0;
264 }
265
266 /* Try parsing as hexadecimal with a 0x prefix. */
267 CBS_dup(&cbs, &cbs_hex);
268 if (!CBS_get_u8(&cbs_hex, &v))
269 goto decimal;
270 if (v != '0')
271 goto decimal;
272 if (!CBS_get_u8(&cbs_hex, &v))
273 goto decimal;
274 if (v != 'X' && v != 'x')
275 goto decimal;
276 if (bn_hex2bn_cbs(bnp, &cbs_hex) == 0)
277 return 0;
278
279 goto done;
280
281 decimal:
282 if (bn_dec2bn_cbs(bnp, &cbs) == 0)
283 return 0;
284
285 done:
286 if (bnp != NULL && *bnp != NULL)
287 BN_set_negative(*bnp, neg);
288
289 return 1;
290}
291LCRYPTO_ALIAS(BN_asc2bn);
292
293char *
294BN_bn2dec(const BIGNUM *bn)
295{
296 int started = 0;
297 BIGNUM *tmp = NULL;
298 uint8_t *data = NULL;
299 size_t data_len = 0;
300 uint8_t *s = NULL;
301 size_t s_len;
302 BN_ULONG v, w;
303 uint8_t c;
304 CBB cbb;
305 CBS cbs;
306 int i;
307
308 if (!CBB_init(&cbb, 0))
309 goto err;
310
311 if ((tmp = BN_dup(bn)) == NULL)
312 goto err;
313
314 /*
315 * Divide the BIGNUM by a large multiple of 10, then break the remainder
316 * into decimal digits. This produces a reversed string of digits,
317 * potentially with leading zeroes.
318 */
319 while (!BN_is_zero(tmp)) {
320 if ((w = BN_div_word(tmp, BN_DEC_CONV)) == -1)
321 goto err;
322 for (i = 0; i < BN_DEC_NUM; i++) {
323 v = w % 10;
324 if (!CBB_add_u8(&cbb, '0' + v))
325 goto err;
326 w /= 10;
327 }
328 }
329 if (!CBB_finish(&cbb, &data, &data_len))
330 goto err;
331
332 if (data_len > SIZE_MAX - 3)
333 goto err;
334 if (!CBB_init(&cbb, data_len + 3))
335 goto err;
336
337 if (BN_is_negative(bn)) {
338 if (!CBB_add_u8(&cbb, '-'))
339 goto err;
340 }
341
342 /* Reverse digits and trim leading zeroes. */
343 CBS_init(&cbs, data, data_len);
344 while (CBS_len(&cbs) > 0) {
345 if (!CBS_get_last_u8(&cbs, &c))
346 goto err;
347 if (!started && c == '0')
348 continue;
349 if (!CBB_add_u8(&cbb, c))
350 goto err;
351 started = 1;
352 }
353
354 if (!started) {
355 if (!CBB_add_u8(&cbb, '0'))
356 goto err;
357 }
358 if (!CBB_add_u8(&cbb, '\0'))
359 goto err;
360 if (!CBB_finish(&cbb, &s, &s_len))
361 goto err;
362
363 err:
364 BN_free(tmp);
365 CBB_cleanup(&cbb);
366 freezero(data, data_len);
367
368 return s;
369}
370LCRYPTO_ALIAS(BN_bn2dec);
371
372static int
373bn_dec2bn_cbs(BIGNUM **bnp, CBS *cbs)
374{
375 CBS cbs_digits;
376 BIGNUM *bn = NULL;
377 int d, neg, num;
378 size_t digits = 0;
379 BN_ULONG w;
380 uint8_t v;
381
382 /* Handle negative sign. */
383 if (!CBS_peek_u8(cbs, &v))
384 goto err;
385 if ((neg = (v == '-'))) {
386 if (!CBS_skip(cbs, 1))
387 goto err;
388 }
389
390 /* Scan to find last decimal digit. */
391 CBS_dup(cbs, &cbs_digits);
392 while (CBS_len(&cbs_digits) > 0) {
393 if (!CBS_get_u8(&cbs_digits, &v))
394 goto err;
395 if (!isdigit(v))
396 break;
397 digits++;
398 }
399 if (digits > INT_MAX / 4)
400 goto err;
401
402 num = digits + neg;
403
404 if (bnp == NULL)
405 return num;
406
407 if ((bn = *bnp) == NULL)
408 bn = BN_new();
409 if (bn == NULL)
410 goto err;
411 if (!bn_expand_bits(bn, digits * 4))
412 goto err;
413
414 if ((d = digits % BN_DEC_NUM) == 0)
415 d = BN_DEC_NUM;
416
417 w = 0;
418
419 /* Work forwards from most significant digit. */
420 while (digits-- > 0) {
421 if (!CBS_get_u8(cbs, &v))
422 goto err;
423
424 if (v < '0' || v > '9')
425 goto err;
426
427 v -= '0';
428 w = w * 10 + v;
429 d--;
430
431 if (d == 0) {
432 if (!BN_mul_word(bn, BN_DEC_CONV))
433 goto err;
434 if (!BN_add_word(bn, w))
435 goto err;
436
437 d = BN_DEC_NUM;
438 w = 0;
439 }
440 }
441
442 bn_correct_top(bn);
443
444 BN_set_negative(bn, neg);
445
446 *bnp = bn;
447
448 return num;
449
450 err:
451 if (bnp != NULL && *bnp == NULL)
452 BN_free(bn);
453
454 return 0;
455}
456
457int
458BN_dec2bn(BIGNUM **bnp, const char *s)
459{
460 size_t s_len;
461 CBS cbs;
462
463 if (bnp != NULL && *bnp != NULL)
464 BN_zero(*bnp);
465
466 if (s == NULL)
467 return 0;
468 if ((s_len = strlen(s)) == 0)
469 return 0;
470
471 CBS_init(&cbs, s, s_len);
472
473 return bn_dec2bn_cbs(bnp, &cbs);
474}
475LCRYPTO_ALIAS(BN_dec2bn);
476
477static int
478bn_bn2hex_internal(const BIGNUM *bn, int include_sign, int nibbles_only,
479 char **out, size_t *out_len)
480{
481 int started = 0;
482 uint8_t *s = NULL;
483 size_t s_len = 0;
484 BN_ULONG v, w;
485 int i, j;
486 CBB cbb;
487 CBS cbs;
488 uint8_t nul;
489 int ret = 0;
490
491 *out = NULL;
492 *out_len = 0;
493
494 if (!CBB_init(&cbb, 0))
495 goto err;
496
497 if (BN_is_negative(bn) && include_sign) {
498 if (!CBB_add_u8(&cbb, '-'))
499 goto err;
500 }
501 if (BN_is_zero(bn)) {
502 if (!CBB_add_u8(&cbb, '0'))
503 goto err;
504 }
505 for (i = bn->top - 1; i >= 0; i--) {
506 w = bn->d[i];
507 for (j = BN_BITS2 - 8; j >= 0; j -= 8) {
508 v = (w >> j) & 0xff;
509 if (!started && v == 0)
510 continue;
511 if (started || !nibbles_only || (v >> 4) != 0) {
512 if (!CBB_add_u8(&cbb, hex_digits[v >> 4]))
513 goto err;
514 }
515 if (!CBB_add_u8(&cbb, hex_digits[v & 0xf]))
516 goto err;
517 started = 1;
518 }
519 }
520 if (!CBB_add_u8(&cbb, '\0'))
521 goto err;
522 if (!CBB_finish(&cbb, &s, &s_len))
523 goto err;
524
525 /* The length of a C string does not include the terminating NUL. */
526 CBS_init(&cbs, s, s_len);
527 if (!CBS_get_last_u8(&cbs, &nul))
528 goto err;
529
530 *out = (char *)CBS_data(&cbs);
531 *out_len = CBS_len(&cbs);
532 s = NULL;
533 s_len = 0;
534
535 ret = 1;
536
537 err:
538 CBB_cleanup(&cbb);
539 freezero(s, s_len);
540
541 return ret;
542}
543
544int
545bn_bn2hex_nosign(const BIGNUM *bn, char **out, size_t *out_len)
546{
547 return bn_bn2hex_internal(bn, 0, 0, out, out_len);
548}
549
550int
551bn_bn2hex_nibbles(const BIGNUM *bn, char **out, size_t *out_len)
552{
553 return bn_bn2hex_internal(bn, 1, 1, out, out_len);
554}
555
556char *
557BN_bn2hex(const BIGNUM *bn)
558{
559 char *s;
560 size_t s_len;
561
562 if (!bn_bn2hex_internal(bn, 1, 0, &s, &s_len))
563 return NULL;
564
565 return s;
566}
567LCRYPTO_ALIAS(BN_bn2hex);
568
569static int
570bn_hex2bn_cbs(BIGNUM **bnp, CBS *cbs)
571{
572 CBS cbs_digits;
573 BIGNUM *bn = NULL;
574 int b, i, neg, num;
575 size_t digits = 0;
576 BN_ULONG w;
577 uint8_t v;
578
579 /* Handle negative sign. */
580 if (!CBS_peek_u8(cbs, &v))
581 goto err;
582 if ((neg = (v == '-'))) {
583 if (!CBS_skip(cbs, 1))
584 goto err;
585 }
586
587 /* Scan to find last hexadecimal digit. */
588 CBS_dup(cbs, &cbs_digits);
589 while (CBS_len(&cbs_digits) > 0) {
590 if (!CBS_get_u8(&cbs_digits, &v))
591 goto err;
592 if (!isxdigit(v))
593 break;
594 digits++;
595 }
596 if (digits > INT_MAX / 4)
597 goto err;
598
599 num = digits + neg;
600
601 if (bnp == NULL)
602 return num;
603
604 if ((bn = *bnp) == NULL)
605 bn = BN_new();
606 if (bn == NULL)
607 goto err;
608 if (!bn_expand_bits(bn, digits * 4))
609 goto err;
610
611 if (!CBS_get_bytes(cbs, cbs, digits))
612 goto err;
613
614 b = 0;
615 i = 0;
616 w = 0;
617
618 /* Work backwards from least significant digit. */
619 while (digits-- > 0) {
620 if (!CBS_get_last_u8(cbs, &v))
621 goto err;
622
623 if (v >= '0' && v <= '9')
624 v -= '0';
625 else if (v >= 'a' && v <= 'f')
626 v -= 'a' - 10;
627 else if (v >= 'A' && v <= 'F')
628 v -= 'A' - 10;
629 else
630 goto err;
631
632 w |= (BN_ULONG)v << b;
633 b += 4;
634
635 if (b == BN_BITS2 || digits == 0) {
636 b = 0;
637 bn->d[i++] = w;
638 w = 0;
639 }
640 }
641
642 bn->top = i;
643 bn_correct_top(bn);
644
645 BN_set_negative(bn, neg);
646
647 *bnp = bn;
648
649 return num;
650
651 err:
652 if (bnp != NULL && *bnp == NULL)
653 BN_free(bn);
654
655 return 0;
656}
657
658int
659BN_hex2bn(BIGNUM **bnp, const char *s)
660{
661 size_t s_len;
662 CBS cbs;
663
664 if (bnp != NULL && *bnp != NULL)
665 BN_zero(*bnp);
666
667 if (s == NULL)
668 return 0;
669 if ((s_len = strlen(s)) == 0)
670 return 0;
671
672 CBS_init(&cbs, s, s_len);
673
674 return bn_hex2bn_cbs(bnp, &cbs);
675}
676LCRYPTO_ALIAS(BN_hex2bn);
677
678int
679BN_bn2mpi(const BIGNUM *bn, unsigned char *d)
680{
681 uint8_t *out_bin;
682 size_t out_len, out_bin_len;
683 int bits, bytes;
684 int extend;
685 CBB cbb, cbb_bin;
686
687 bits = BN_num_bits(bn);
688 bytes = (bits + 7) / 8;
689 extend = (bits != 0) && (bits % 8 == 0);
690 out_bin_len = extend + bytes;
691 out_len = 4 + out_bin_len;
692
693 if (d == NULL)
694 return out_len;
695
696 if (!CBB_init_fixed(&cbb, d, out_len))
697 goto err;
698 if (!CBB_add_u32_length_prefixed(&cbb, &cbb_bin))
699 goto err;
700 if (!CBB_add_space(&cbb_bin, &out_bin, out_bin_len))
701 goto err;
702 if (BN_bn2binpad(bn, out_bin, out_bin_len) != out_bin_len)
703 goto err;
704 if (!CBB_finish(&cbb, NULL, NULL))
705 goto err;
706
707 if (bn->neg)
708 d[4] |= 0x80;
709
710 return out_len;
711
712 err:
713 CBB_cleanup(&cbb);
714
715 return -1;
716}
717LCRYPTO_ALIAS(BN_bn2mpi);
718
719BIGNUM *
720BN_mpi2bn(const unsigned char *d, int n, BIGNUM *bn_in)
721{
722 BIGNUM *bn = bn_in;
723 uint32_t mpi_len;
724 uint8_t v;
725 int neg = 0;
726 CBS cbs;
727
728 if (n < 0)
729 return NULL;
730
731 CBS_init(&cbs, d, n);
732
733 if (!CBS_get_u32(&cbs, &mpi_len)) {
734 BNerror(BN_R_INVALID_LENGTH);
735 return NULL;
736 }
737 if (CBS_len(&cbs) != mpi_len) {
738 BNerror(BN_R_ENCODING_ERROR);
739 return NULL;
740 }
741 if (CBS_len(&cbs) > 0) {
742 if (!CBS_peek_u8(&cbs, &v))
743 return NULL;
744 neg = (v >> 7) & 1;
745 }
746
747 if (!bn_bin2bn_cbs(&bn, &cbs, 0))
748 return NULL;
749
750 if (neg)
751 BN_clear_bit(bn, BN_num_bits(bn) - 1);
752
753 BN_set_negative(bn, neg);
754
755 return bn;
756}
757LCRYPTO_ALIAS(BN_mpi2bn);
diff --git a/src/lib/libcrypto/bn/bn_ctx.c b/src/lib/libcrypto/bn/bn_ctx.c
deleted file mode 100644
index 129b9c9781..0000000000
--- a/src/lib/libcrypto/bn/bn_ctx.c
+++ /dev/null
@@ -1,161 +0,0 @@
1/* $OpenBSD: bn_ctx.c,v 1.22 2023/07/08 12:21:58 beck Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <stddef.h>
19#include <string.h>
20
21#include <openssl/opensslconf.h>
22#include <openssl/err.h>
23
24#include "bn_local.h"
25
26#define BN_CTX_INITIAL_LEN 8
27
28struct bignum_ctx {
29 BIGNUM **bignums;
30 uint8_t *groups;
31 uint8_t group;
32 size_t index;
33 size_t len;
34
35 int error;
36};
37
38static int
39bn_ctx_grow(BN_CTX *bctx)
40{
41 BIGNUM **bignums = NULL;
42 uint8_t *groups = NULL;
43 size_t len;
44
45 if ((len = bctx->len) == 0) {
46 len = BN_CTX_INITIAL_LEN;
47 } else {
48 if (SIZE_MAX - len < len)
49 return 0;
50 len *= 2;
51 }
52
53 if ((bignums = recallocarray(bctx->bignums, bctx->len, len,
54 sizeof(bctx->bignums[0]))) == NULL)
55 return 0;
56 bctx->bignums = bignums;
57
58 if ((groups = reallocarray(bctx->groups, len,
59 sizeof(bctx->groups[0]))) == NULL)
60 return 0;
61 bctx->groups = groups;
62
63 bctx->len = len;
64
65 return 1;
66}
67
68BN_CTX *
69BN_CTX_new(void)
70{
71 return calloc(1, sizeof(struct bignum_ctx));
72}
73LCRYPTO_ALIAS(BN_CTX_new);
74
75void
76BN_CTX_free(BN_CTX *bctx)
77{
78 size_t i;
79
80 if (bctx == NULL)
81 return;
82
83 for (i = 0; i < bctx->len; i++) {
84 BN_free(bctx->bignums[i]);
85 bctx->bignums[i] = NULL;
86 }
87
88 free(bctx->bignums);
89 free(bctx->groups);
90
91 freezero(bctx, sizeof(*bctx));
92}
93LCRYPTO_ALIAS(BN_CTX_free);
94
95void
96BN_CTX_start(BN_CTX *bctx)
97{
98 bctx->group++;
99
100 if (bctx->group == 0) {
101 BNerror(BN_R_TOO_MANY_TEMPORARY_VARIABLES);
102 bctx->error = 1;
103 return;
104 }
105}
106LCRYPTO_ALIAS(BN_CTX_start);
107
108BIGNUM *
109BN_CTX_get(BN_CTX *bctx)
110{
111 BIGNUM *bn = NULL;
112
113 if (bctx->error)
114 return NULL;
115
116 if (bctx->group == 0) {
117 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
118 bctx->error = 1;
119 return NULL;
120 }
121
122 if (bctx->index == bctx->len) {
123 if (!bn_ctx_grow(bctx)) {
124 BNerror(BN_R_TOO_MANY_TEMPORARY_VARIABLES);
125 bctx->error = 1;
126 return NULL;
127 }
128 }
129
130 if ((bn = bctx->bignums[bctx->index]) == NULL) {
131 if ((bn = BN_new()) == NULL) {
132 BNerror(BN_R_TOO_MANY_TEMPORARY_VARIABLES);
133 bctx->error = 1;
134 return NULL;
135 }
136 bctx->bignums[bctx->index] = bn;
137 }
138 bctx->groups[bctx->index] = bctx->group;
139 bctx->index++;
140
141 BN_zero(bn);
142
143 return bn;
144}
145LCRYPTO_ALIAS(BN_CTX_get);
146
147void
148BN_CTX_end(BN_CTX *bctx)
149{
150 if (bctx == NULL || bctx->error || bctx->group == 0)
151 return;
152
153 while (bctx->index > 0 && bctx->groups[bctx->index - 1] == bctx->group) {
154 BN_zero(bctx->bignums[bctx->index - 1]);
155 bctx->groups[bctx->index - 1] = 0;
156 bctx->index--;
157 }
158
159 bctx->group--;
160}
161LCRYPTO_ALIAS(BN_CTX_end);
diff --git a/src/lib/libcrypto/bn/bn_div.c b/src/lib/libcrypto/bn/bn_div.c
deleted file mode 100644
index 09a8a364df..0000000000
--- a/src/lib/libcrypto/bn/bn_div.c
+++ /dev/null
@@ -1,458 +0,0 @@
1/* $OpenBSD: bn_div.c,v 1.41 2024/04/10 14:58:06 beck Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <assert.h>
60#include <stdio.h>
61
62#include <openssl/opensslconf.h>
63
64#include <openssl/bn.h>
65#include <openssl/err.h>
66
67#include "bn_arch.h"
68#include "bn_local.h"
69#include "bn_internal.h"
70
71BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0);
72
73#ifndef HAVE_BN_DIV_WORDS
74#if defined(BN_LLONG) && defined(BN_DIV2W)
75
76BN_ULONG
77bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
78{
79 return ((BN_ULONG)(((((BN_ULLONG)h) << BN_BITS2)|l)/(BN_ULLONG)d));
80}
81
82#else
83
84/* Divide h,l by d and return the result. */
85/* I need to test this some more :-( */
86BN_ULONG
87bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
88{
89 BN_ULONG dh, dl, q,ret = 0, th, tl, t;
90 int i, count = 2;
91
92 if (d == 0)
93 return (BN_MASK2);
94
95 i = BN_num_bits_word(d);
96 assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i));
97
98 i = BN_BITS2 - i;
99 if (h >= d)
100 h -= d;
101
102 if (i) {
103 d <<= i;
104 h = (h << i) | (l >> (BN_BITS2 - i));
105 l <<= i;
106 }
107 dh = (d & BN_MASK2h) >> BN_BITS4;
108 dl = (d & BN_MASK2l);
109 for (;;) {
110 if ((h >> BN_BITS4) == dh)
111 q = BN_MASK2l;
112 else
113 q = h / dh;
114
115 th = q * dh;
116 tl = dl * q;
117 for (;;) {
118 t = h - th;
119 if ((t & BN_MASK2h) ||
120 ((tl) <= (
121 (t << BN_BITS4) |
122 ((l & BN_MASK2h) >> BN_BITS4))))
123 break;
124 q--;
125 th -= dh;
126 tl -= dl;
127 }
128 t = (tl >> BN_BITS4);
129 tl = (tl << BN_BITS4) & BN_MASK2h;
130 th += t;
131
132 if (l < tl)
133 th++;
134 l -= tl;
135 if (h < th) {
136 h += d;
137 q--;
138 }
139 h -= th;
140
141 if (--count == 0)
142 break;
143
144 ret = q << BN_BITS4;
145 h = ((h << BN_BITS4) | (l >> BN_BITS4)) & BN_MASK2;
146 l = (l & BN_MASK2l) << BN_BITS4;
147 }
148 ret |= q;
149 return (ret);
150}
151#endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */
152#endif
153
154/*
155 * Divide a double word (h:l) by d, returning the quotient q and the remainder
156 * r, such that q * d + r is equal to the numerator.
157 */
158#ifndef HAVE_BN_DIV_REM_WORDS
159#ifndef HAVE_BN_DIV_REM_WORDS_INLINE
160static inline void
161bn_div_rem_words_inline(BN_ULONG h, BN_ULONG l, BN_ULONG d, BN_ULONG *out_q,
162 BN_ULONG *out_r)
163{
164 BN_ULONG q, r;
165
166 q = bn_div_words(h, l, d);
167 r = (l - q * d) & BN_MASK2;
168
169 *out_q = q;
170 *out_r = r;
171}
172#endif
173
174void
175bn_div_rem_words(BN_ULONG h, BN_ULONG l, BN_ULONG d, BN_ULONG *out_q,
176 BN_ULONG *out_r)
177{
178 bn_div_rem_words_inline(h, l, d, out_q, out_r);
179}
180#endif
181
182#ifndef HAVE_BN_DIV_3_WORDS
183
184/*
185 * Interface is somewhat quirky, |m| is pointer to most significant limb,
186 * and less significant limb is referred at |m[-1]|. This means that caller
187 * is responsible for ensuring that |m[-1]| is valid. Second condition that
188 * has to be met is that |d0|'s most significant bit has to be set. Or in
189 * other words divisor has to be "bit-aligned to the left." The subroutine
190 * considers four limbs, two of which are "overlapping," hence the name...
191 */
192BN_ULONG
193bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0)
194{
195 BN_ULONG n0, n1, q, t2h, t2l;
196 BN_ULONG rem = 0;
197
198 n0 = m[0];
199 n1 = m[-1];
200
201 if (n0 == d0)
202 return BN_MASK2;
203
204 /* n0 < d0 */
205 bn_div_rem_words(n0, n1, d0, &q, &rem);
206
207 bn_mulw(d1, q, &t2h, &t2l);
208
209 for (;;) {
210 if (t2h < rem || (t2h == rem && t2l <= m[-2]))
211 break;
212 q--;
213 rem += d0;
214 if (rem < d0)
215 break; /* don't let rem overflow */
216 if (t2l < d1)
217 t2h--;
218 t2l -= d1;
219 }
220
221 return q;
222}
223#endif /* !HAVE_BN_DIV_3_WORDS */
224
225/*
226 * BN_div_internal computes quotient := numerator / divisor, rounding towards
227 * zero and setting remainder such that quotient * divisor + remainder equals
228 * the numerator. Thus:
229 *
230 * quotient->neg == numerator->neg ^ divisor->neg (unless result is zero)
231 * remainder->neg == numerator->neg (unless the remainder is zero)
232 *
233 * If either the quotient or remainder is NULL, the respective value is not
234 * returned.
235 */
236static int
237BN_div_internal(BIGNUM *quotient, BIGNUM *remainder, const BIGNUM *numerator,
238 const BIGNUM *divisor, BN_CTX *ctx, int ct)
239{
240 int norm_shift, i, loop, r_neg;
241 BIGNUM *tmp, wnum, *snum, *sdiv, *res;
242 BN_ULONG *resp, *wnump;
243 BN_ULONG d0, d1;
244 int num_n, div_n;
245 int no_branch = 0;
246 int ret = 0;
247
248 BN_CTX_start(ctx);
249
250 /* Invalid zero-padding would have particularly bad consequences. */
251 if (numerator->top > 0 && numerator->d[numerator->top - 1] == 0) {
252 BNerror(BN_R_NOT_INITIALIZED);
253 goto err;
254 }
255
256 if (ct)
257 no_branch = 1;
258
259 if (BN_is_zero(divisor)) {
260 BNerror(BN_R_DIV_BY_ZERO);
261 goto err;
262 }
263
264 if (!no_branch) {
265 if (BN_ucmp(numerator, divisor) < 0) {
266 if (remainder != NULL) {
267 if (!bn_copy(remainder, numerator))
268 goto err;
269 }
270 if (quotient != NULL)
271 BN_zero(quotient);
272
273 goto done;
274 }
275 }
276
277 if ((tmp = BN_CTX_get(ctx)) == NULL)
278 goto err;
279 if ((snum = BN_CTX_get(ctx)) == NULL)
280 goto err;
281 if ((sdiv = BN_CTX_get(ctx)) == NULL)
282 goto err;
283 if ((res = quotient) == NULL) {
284 if ((res = BN_CTX_get(ctx)) == NULL)
285 goto err;
286 }
287
288 /* First we normalise the numbers. */
289 norm_shift = BN_BITS2 - BN_num_bits(divisor) % BN_BITS2;
290 if (!BN_lshift(sdiv, divisor, norm_shift))
291 goto err;
292 sdiv->neg = 0;
293 norm_shift += BN_BITS2;
294 if (!BN_lshift(snum, numerator, norm_shift))
295 goto err;
296 snum->neg = 0;
297
298 if (no_branch) {
299 /*
300 * Since we don't know whether snum is larger than sdiv, we pad
301 * snum with enough zeroes without changing its value.
302 */
303 if (snum->top <= sdiv->top + 1) {
304 if (!bn_wexpand(snum, sdiv->top + 2))
305 goto err;
306 for (i = snum->top; i < sdiv->top + 2; i++)
307 snum->d[i] = 0;
308 snum->top = sdiv->top + 2;
309 } else {
310 if (!bn_wexpand(snum, snum->top + 1))
311 goto err;
312 snum->d[snum->top] = 0;
313 snum->top++;
314 }
315 }
316
317 div_n = sdiv->top;
318 num_n = snum->top;
319 loop = num_n - div_n;
320
321 /*
322 * Setup a 'window' into snum - this is the part that corresponds to the
323 * current 'area' being divided.
324 */
325 wnum.neg = 0;
326 wnum.d = &(snum->d[loop]);
327 wnum.top = div_n;
328 /* only needed when BN_ucmp messes up the values between top and max */
329 wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */
330 wnum.flags = snum->flags | BN_FLG_STATIC_DATA;
331
332 /* Get the top 2 words of sdiv */
333 /* div_n=sdiv->top; */
334 d0 = sdiv->d[div_n - 1];
335 d1 = (div_n == 1) ? 0 : sdiv->d[div_n - 2];
336
337 /* pointer to the 'top' of snum */
338 wnump = &(snum->d[num_n - 1]);
339
340 /* Setup to 'res' */
341 if (!bn_wexpand(res, (loop + 1)))
342 goto err;
343 res->top = loop - no_branch;
344 r_neg = numerator->neg ^ divisor->neg;
345 resp = &(res->d[loop - 1]);
346
347 /* space for temp */
348 if (!bn_wexpand(tmp, (div_n + 1)))
349 goto err;
350
351 if (!no_branch) {
352 if (BN_ucmp(&wnum, sdiv) >= 0) {
353 bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n);
354 *resp = 1;
355 } else
356 res->top--;
357 }
358
359 /*
360 * If res->top == 0 then clear the neg value otherwise decrease the resp
361 * pointer.
362 */
363 if (res->top == 0)
364 res->neg = 0;
365 else
366 resp--;
367
368 for (i = 0; i < loop - 1; i++, wnump--, resp--) {
369 BN_ULONG q, l0;
370
371 /*
372 * The first part of the loop uses the top two words of snum and
373 * sdiv to calculate a BN_ULONG q such that:
374 *
375 * | wnum - sdiv * q | < sdiv
376 */
377 q = bn_div_3_words(wnump, d1, d0);
378 l0 = bn_mul_words(tmp->d, sdiv->d, div_n, q);
379 tmp->d[div_n] = l0;
380 wnum.d--;
381
382 /*
383 * Ignore top values of the bignums just sub the two BN_ULONG
384 * arrays with bn_sub_words.
385 */
386 if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n + 1)) {
387 /*
388 * Note: As we have considered only the leading two
389 * BN_ULONGs in the calculation of q, sdiv * q might be
390 * greater than wnum (but then (q-1) * sdiv is less or
391 * equal than wnum).
392 */
393 q--;
394 if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n)) {
395 /*
396 * We can't have an overflow here (assuming
397 * that q != 0, but if q == 0 then tmp is
398 * zero anyway).
399 */
400 (*wnump)++;
401 }
402 }
403 /* store part of the result */
404 *resp = q;
405 }
406
407 bn_correct_top(snum);
408
409 if (remainder != NULL) {
410 /*
411 * Keep a copy of the neg flag in numerator because if
412 * remainder == numerator, BN_rshift() will overwrite it.
413 */
414 int neg = numerator->neg;
415
416 BN_rshift(remainder, snum, norm_shift);
417 BN_set_negative(remainder, neg);
418 }
419
420 if (no_branch)
421 bn_correct_top(res);
422
423 BN_set_negative(res, r_neg);
424
425 done:
426 ret = 1;
427 err:
428 BN_CTX_end(ctx);
429
430 return ret;
431}
432
433int
434BN_div(BIGNUM *quotient, BIGNUM *remainder, const BIGNUM *numerator,
435 const BIGNUM *divisor, BN_CTX *ctx)
436{
437 int ct;
438
439 ct = BN_get_flags(numerator, BN_FLG_CONSTTIME) != 0 ||
440 BN_get_flags(divisor, BN_FLG_CONSTTIME) != 0;
441
442 return BN_div_internal(quotient, remainder, numerator, divisor, ctx, ct);
443}
444LCRYPTO_ALIAS(BN_div);
445
446int
447BN_div_nonct(BIGNUM *quotient, BIGNUM *remainder, const BIGNUM *numerator,
448 const BIGNUM *divisor, BN_CTX *ctx)
449{
450 return BN_div_internal(quotient, remainder, numerator, divisor, ctx, 0);
451}
452
453int
454BN_div_ct(BIGNUM *quotient, BIGNUM *remainder, const BIGNUM *numerator,
455 const BIGNUM *divisor, BN_CTX *ctx)
456{
457 return BN_div_internal(quotient, remainder, numerator, divisor, ctx, 1);
458}
diff --git a/src/lib/libcrypto/bn/bn_err.c b/src/lib/libcrypto/bn/bn_err.c
deleted file mode 100644
index 3ee6b4311f..0000000000
--- a/src/lib/libcrypto/bn/bn_err.c
+++ /dev/null
@@ -1,110 +0,0 @@
1/* $OpenBSD: bn_err.c,v 1.18 2024/06/24 06:43:22 tb Exp $ */
2/* ====================================================================
3 * Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@OpenSSL.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
54 */
55
56#include <stdio.h>
57
58#include <openssl/opensslconf.h>
59
60#include <openssl/err.h>
61#include <openssl/bn.h>
62
63#include "err_local.h"
64
65#ifndef OPENSSL_NO_ERR
66
67#define ERR_FUNC(func) ERR_PACK(ERR_LIB_BN,func,0)
68#define ERR_REASON(reason) ERR_PACK(ERR_LIB_BN,0,reason)
69
70static const ERR_STRING_DATA BN_str_functs[] = {
71 {ERR_FUNC(0xfff), "CRYPTO_internal"},
72 {0, NULL}
73};
74
75static const ERR_STRING_DATA BN_str_reasons[] = {
76 {ERR_REASON(BN_R_ARG2_LT_ARG3) , "arg2 lt arg3"},
77 {ERR_REASON(BN_R_BAD_RECIPROCAL) , "bad reciprocal"},
78 {ERR_REASON(BN_R_BIGNUM_TOO_LONG) , "bignum too long"},
79 {ERR_REASON(BN_R_BITS_TOO_SMALL) , "bits too small"},
80 {ERR_REASON(BN_R_CALLED_WITH_EVEN_MODULUS), "called with even modulus"},
81 {ERR_REASON(BN_R_DIV_BY_ZERO) , "div by zero"},
82 {ERR_REASON(BN_R_ENCODING_ERROR) , "encoding error"},
83 {ERR_REASON(BN_R_EXPAND_ON_STATIC_BIGNUM_DATA), "expand on static bignum data"},
84 {ERR_REASON(BN_R_INPUT_NOT_REDUCED) , "input not reduced"},
85 {ERR_REASON(BN_R_INVALID_ARGUMENT) , "invalid argument"},
86 {ERR_REASON(BN_R_INVALID_LENGTH) , "invalid length"},
87 {ERR_REASON(BN_R_INVALID_RANGE) , "invalid range"},
88 {ERR_REASON(BN_R_NOT_A_SQUARE) , "not a square"},
89 {ERR_REASON(BN_R_NOT_INITIALIZED) , "not initialized"},
90 {ERR_REASON(BN_R_NO_INVERSE) , "no inverse"},
91 {ERR_REASON(BN_R_NO_SOLUTION) , "no solution"},
92 {ERR_REASON(BN_R_P_IS_NOT_PRIME) , "p is not prime"},
93 {ERR_REASON(BN_R_TOO_MANY_ITERATIONS) , "too many iterations"},
94 {ERR_REASON(BN_R_TOO_MANY_TEMPORARY_VARIABLES), "too many temporary variables"},
95 {0, NULL}
96};
97
98#endif
99
100void
101ERR_load_BN_strings(void)
102{
103#ifndef OPENSSL_NO_ERR
104 if (ERR_func_error_string(BN_str_functs[0].error) == NULL) {
105 ERR_load_const_strings(BN_str_functs);
106 ERR_load_const_strings(BN_str_reasons);
107 }
108#endif
109}
110LCRYPTO_ALIAS(ERR_load_BN_strings);
diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c
deleted file mode 100644
index e925d325d2..0000000000
--- a/src/lib/libcrypto/bn/bn_exp.c
+++ /dev/null
@@ -1,1330 +0,0 @@
1/* $OpenBSD: bn_exp.c,v 1.58 2025/02/13 11:15:09 tb Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include <stdlib.h>
113#include <string.h>
114
115#include <openssl/err.h>
116
117#include "bn_local.h"
118#include "constant_time.h"
119
120/* maximum precomputation table size for *variable* sliding windows */
121#define TABLE_SIZE 32
122
123/* Calculates r = a^p by successive squaring of a. Not constant time. */
124int
125BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
126{
127 BIGNUM *rr, *v;
128 int i;
129 int ret = 0;
130
131 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
132 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
133 return -1;
134 }
135
136 BN_CTX_start(ctx);
137
138 if ((v = BN_CTX_get(ctx)) == NULL)
139 goto err;
140
141 rr = r;
142 if (r == a || r == p)
143 rr = BN_CTX_get(ctx);
144 if (rr == NULL)
145 goto err;
146
147 if (!BN_one(rr))
148 goto err;
149 if (BN_is_odd(p)) {
150 if (!bn_copy(rr, a))
151 goto err;
152 }
153
154 if (!bn_copy(v, a))
155 goto err;
156
157 for (i = 1; i < BN_num_bits(p); i++) {
158 if (!BN_sqr(v, v, ctx))
159 goto err;
160 if (!BN_is_bit_set(p, i))
161 continue;
162 if (!BN_mul(rr, rr, v, ctx))
163 goto err;
164 }
165
166 if (!bn_copy(r, rr))
167 goto err;
168
169 ret = 1;
170
171 err:
172 BN_CTX_end(ctx);
173
174 return ret;
175}
176LCRYPTO_ALIAS(BN_exp);
177
178/* The old fallback, simple version :-) */
179int
180BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
181 BN_CTX *ctx)
182{
183 int i, j, bits, wstart, wend, window, wvalue;
184 int start = 1;
185 BIGNUM *d, *q;
186 /* Table of variables obtained from 'ctx' */
187 BIGNUM *val[TABLE_SIZE];
188 int ret = 0;
189
190 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
191 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
192 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
193 return -1;
194 }
195
196 if (r == m) {
197 BNerror(BN_R_INVALID_ARGUMENT);
198 return 0;
199 }
200
201 bits = BN_num_bits(p);
202 if (bits == 0) {
203 /* x**0 mod 1 is still zero. */
204 if (BN_abs_is_word(m, 1)) {
205 ret = 1;
206 BN_zero(r);
207 } else
208 ret = BN_one(r);
209 return ret;
210 }
211
212 BN_CTX_start(ctx);
213 if ((d = BN_CTX_get(ctx)) == NULL)
214 goto err;
215 if ((q = BN_CTX_get(ctx)) == NULL)
216 goto err;
217 if ((val[0] = BN_CTX_get(ctx)) == NULL)
218 goto err;
219
220 if (!BN_nnmod(val[0], a, m, ctx))
221 goto err;
222 if (BN_is_zero(val[0])) {
223 BN_zero(r);
224 goto done;
225 }
226 if (!bn_copy(q, p))
227 goto err;
228
229 window = BN_window_bits_for_exponent_size(bits);
230 if (window > 1) {
231 if (!BN_mod_mul(d, val[0], val[0], m, ctx))
232 goto err;
233 j = 1 << (window - 1);
234 for (i = 1; i < j; i++) {
235 if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
236 !BN_mod_mul(val[i], val[i - 1], d,m, ctx))
237 goto err;
238 }
239 }
240
241 start = 1; /* This is used to avoid multiplication etc
242 * when there is only the value '1' in the
243 * buffer. */
244 wvalue = 0; /* The 'value' of the window */
245 wstart = bits - 1; /* The top bit of the window */
246 wend = 0; /* The bottom bit of the window */
247
248 if (!BN_one(r))
249 goto err;
250
251 for (;;) {
252 if (BN_is_bit_set(q, wstart) == 0) {
253 if (!start)
254 if (!BN_mod_mul(r, r, r, m, ctx))
255 goto err;
256 if (wstart == 0)
257 break;
258 wstart--;
259 continue;
260 }
261 /* We now have wstart on a 'set' bit, we now need to work out
262 * how bit a window to do. To do this we need to scan
263 * forward until the last set bit before the end of the
264 * window */
265 j = wstart;
266 wvalue = 1;
267 wend = 0;
268 for (i = 1; i < window; i++) {
269 if (wstart - i < 0)
270 break;
271 if (BN_is_bit_set(q, wstart - i)) {
272 wvalue <<= (i - wend);
273 wvalue |= 1;
274 wend = i;
275 }
276 }
277
278 /* wend is the size of the current window */
279 j = wend + 1;
280 /* add the 'bytes above' */
281 if (!start)
282 for (i = 0; i < j; i++) {
283 if (!BN_mod_mul(r, r, r, m, ctx))
284 goto err;
285 }
286
287 /* wvalue will be an odd number < 2^window */
288 if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
289 goto err;
290
291 /* move the 'window' down further */
292 wstart -= wend + 1;
293 wvalue = 0;
294 start = 0;
295 if (wstart < 0)
296 break;
297 }
298
299 done:
300 ret = 1;
301
302 err:
303 BN_CTX_end(ctx);
304
305 return ret;
306}
307
308/* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
309 * so that accessing any of these table values shows the same access pattern as far
310 * as cache lines are concerned. The following functions are used to transfer a BIGNUM
311 * from/to that table. */
312
313static int
314MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf,
315 int idx, int window)
316{
317 int i, j;
318 int width = 1 << window;
319 BN_ULONG *table = (BN_ULONG *)buf;
320
321 if (top > b->top)
322 top = b->top; /* this works because 'buf' is explicitly zeroed */
323
324 for (i = 0, j = idx; i < top; i++, j += width) {
325 table[j] = b->d[i];
326 }
327
328 return 1;
329}
330
331static int
332MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx,
333 int window)
334{
335 int i, j;
336 int width = 1 << window;
337 volatile BN_ULONG *table = (volatile BN_ULONG *)buf;
338
339 if (!bn_wexpand(b, top))
340 return 0;
341
342 if (window <= 3) {
343 for (i = 0; i < top; i++, table += width) {
344 BN_ULONG acc = 0;
345
346 for (j = 0; j < width; j++) {
347 acc |= table[j] &
348 ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
349 }
350
351 b->d[i] = acc;
352 }
353 } else {
354 int xstride = 1 << (window - 2);
355 BN_ULONG y0, y1, y2, y3;
356
357 i = idx >> (window - 2); /* equivalent of idx / xstride */
358 idx &= xstride - 1; /* equivalent of idx % xstride */
359
360 y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1);
361 y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1);
362 y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1);
363 y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1);
364
365 for (i = 0; i < top; i++, table += width) {
366 BN_ULONG acc = 0;
367
368 for (j = 0; j < xstride; j++) {
369 acc |= ( (table[j + 0 * xstride] & y0) |
370 (table[j + 1 * xstride] & y1) |
371 (table[j + 2 * xstride] & y2) |
372 (table[j + 3 * xstride] & y3) )
373 & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
374 }
375
376 b->d[i] = acc;
377 }
378 }
379 b->top = top;
380 bn_correct_top(b);
381 return 1;
382}
383
384/* Given a pointer value, compute the next address that is a cache line multiple. */
385#define MOD_EXP_CTIME_ALIGN(x_) \
386 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
387
388/* This variant of BN_mod_exp_mont() uses fixed windows and the special
389 * precomputation memory layout to limit data-dependency to a minimum
390 * to protect secret exponents (cf. the hyper-threading timing attacks
391 * pointed out by Colin Percival,
392 * http://www.daemonology.net/hyperthreading-considered-harmful/)
393 */
394int
395BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
396 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
397{
398 int i, bits, ret = 0, window, wvalue;
399 int top;
400 BN_MONT_CTX *mont = NULL;
401 int numPowers;
402 unsigned char *powerbufFree = NULL;
403 int powerbufLen = 0;
404 unsigned char *powerbuf = NULL;
405 BIGNUM tmp, am;
406
407
408 if (!BN_is_odd(m)) {
409 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
410 return (0);
411 }
412
413 top = m->top;
414
415 bits = BN_num_bits(p);
416 if (bits == 0) {
417 /* x**0 mod 1 is still zero. */
418 if (BN_abs_is_word(m, 1)) {
419 ret = 1;
420 BN_zero(rr);
421 } else
422 ret = BN_one(rr);
423 return ret;
424 }
425
426 BN_CTX_start(ctx);
427
428 if ((mont = in_mont) == NULL)
429 mont = BN_MONT_CTX_create(m, ctx);
430 if (mont == NULL)
431 goto err;
432
433 /* Get the window size to use with size of p. */
434 window = BN_window_bits_for_ctime_exponent_size(bits);
435#if defined(OPENSSL_BN_ASM_MONT5)
436 if (window == 6 && bits <= 1024)
437 window = 5; /* ~5% improvement of 2048-bit RSA sign */
438#endif
439
440 /* Allocate a buffer large enough to hold all of the pre-computed
441 * powers of am, am itself and tmp.
442 */
443 numPowers = 1 << window;
444 powerbufLen = sizeof(m->d[0]) * (top * numPowers +
445 ((2*top) > numPowers ? (2*top) : numPowers));
446 if ((powerbufFree = calloc(powerbufLen +
447 MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH, 1)) == NULL)
448 goto err;
449 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
450
451 /* lay down tmp and am right after powers table */
452 tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
453 am.d = tmp.d + top;
454 tmp.top = am.top = 0;
455 tmp.dmax = am.dmax = top;
456 tmp.neg = am.neg = 0;
457 tmp.flags = am.flags = BN_FLG_STATIC_DATA;
458
459 /* prepare a^0 in Montgomery domain */
460#if 1
461 if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
462 goto err;
463#else
464 tmp.d[0] = (0 - m - >d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */
465 for (i = 1; i < top; i++)
466 tmp.d[i] = (~m->d[i]) & BN_MASK2;
467 tmp.top = top;
468#endif
469
470 /* prepare a^1 in Montgomery domain */
471 if (!BN_nnmod(&am, a, m, ctx))
472 goto err;
473 if (!BN_to_montgomery(&am, &am, mont, ctx))
474 goto err;
475
476#if defined(OPENSSL_BN_ASM_MONT5)
477 /* This optimization uses ideas from http://eprint.iacr.org/2011/239,
478 * specifically optimization of cache-timing attack countermeasures
479 * and pre-computation optimization. */
480
481 /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
482 * 512-bit RSA is hardly relevant, we omit it to spare size... */
483 if (window == 5 && top > 1) {
484 void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
485 const void *table, const BN_ULONG *np,
486 const BN_ULONG *n0, int num, int power);
487 void bn_scatter5(const BN_ULONG *inp, size_t num,
488 void *table, size_t power);
489 void bn_gather5(BN_ULONG *out, size_t num,
490 void *table, size_t power);
491
492 BN_ULONG *np = mont->N.d, *n0 = mont->n0;
493
494 /* BN_to_montgomery can contaminate words above .top
495 * [in BN_DEBUG[_DEBUG] build]... */
496 for (i = am.top; i < top; i++)
497 am.d[i] = 0;
498 for (i = tmp.top; i < top; i++)
499 tmp.d[i] = 0;
500
501 bn_scatter5(tmp.d, top, powerbuf, 0);
502 bn_scatter5(am.d, am.top, powerbuf, 1);
503 bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
504 bn_scatter5(tmp.d, top, powerbuf, 2);
505
506#if 0
507 for (i = 3; i < 32; i++) {
508 /* Calculate a^i = a^(i-1) * a */
509 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
510 n0, top, i - 1);
511 bn_scatter5(tmp.d, top, powerbuf, i);
512 }
513#else
514 /* same as above, but uses squaring for 1/2 of operations */
515 for (i = 4; i < 32; i*=2) {
516 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
517 bn_scatter5(tmp.d, top, powerbuf, i);
518 }
519 for (i = 3; i < 8; i += 2) {
520 int j;
521 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
522 n0, top, i - 1);
523 bn_scatter5(tmp.d, top, powerbuf, i);
524 for (j = 2 * i; j < 32; j *= 2) {
525 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
526 bn_scatter5(tmp.d, top, powerbuf, j);
527 }
528 }
529 for (; i < 16; i += 2) {
530 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
531 n0, top, i - 1);
532 bn_scatter5(tmp.d, top, powerbuf, i);
533 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
534 bn_scatter5(tmp.d, top, powerbuf, 2*i);
535 }
536 for (; i < 32; i += 2) {
537 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
538 n0, top, i - 1);
539 bn_scatter5(tmp.d, top, powerbuf, i);
540 }
541#endif
542 bits--;
543 for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
544 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
545 bn_gather5(tmp.d, top, powerbuf, wvalue);
546
547 /* Scan the exponent one window at a time starting from the most
548 * significant bits.
549 */
550 while (bits >= 0) {
551 for (wvalue = 0, i = 0; i < 5; i++, bits--)
552 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
553
554 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
555 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
556 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
557 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
558 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
559 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
560 }
561
562 tmp.top = top;
563 bn_correct_top(&tmp);
564 } else
565#endif
566 {
567 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0,
568 window))
569 goto err;
570 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1,
571 window))
572 goto err;
573
574 /* If the window size is greater than 1, then calculate
575 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
576 * (even powers could instead be computed as (a^(i/2))^2
577 * to use the slight performance advantage of sqr over mul).
578 */
579 if (window > 1) {
580 if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
581 goto err;
582 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf,
583 2, window))
584 goto err;
585 for (i = 3; i < numPowers; i++) {
586 /* Calculate a^i = a^(i-1) * a */
587 if (!BN_mod_mul_montgomery(&tmp, &am, &tmp,
588 mont, ctx))
589 goto err;
590 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top,
591 powerbuf, i, window))
592 goto err;
593 }
594 }
595
596 bits--;
597 for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
598 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
599 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf,
600 wvalue, window))
601 goto err;
602
603 /* Scan the exponent one window at a time starting from the most
604 * significant bits.
605 */
606 while (bits >= 0) {
607 wvalue = 0; /* The 'value' of the window */
608
609 /* Scan the window, squaring the result as we go */
610 for (i = 0; i < window; i++, bits--) {
611 if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp,
612 mont, ctx))
613 goto err;
614 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
615 }
616
617 /* Fetch the appropriate pre-computed value from the pre-buf */
618 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf,
619 wvalue, window))
620 goto err;
621
622 /* Multiply the result into the intermediate result */
623 if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
624 goto err;
625 }
626 }
627
628 /* Convert the final result from montgomery to standard format */
629 if (!BN_from_montgomery(rr, &tmp, mont, ctx))
630 goto err;
631
632 ret = 1;
633
634 err:
635 if (mont != in_mont)
636 BN_MONT_CTX_free(mont);
637 BN_CTX_end(ctx);
638 freezero(powerbufFree, powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
639
640 return ret;
641}
642LCRYPTO_ALIAS(BN_mod_exp_mont_consttime);
643
644static int
645BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
646 BN_CTX *ctx, BN_MONT_CTX *in_mont, int ct)
647{
648 int i, j, bits, ret = 0, wstart, wend, window, wvalue;
649 int start = 1;
650 BIGNUM *d, *r;
651 const BIGNUM *aa;
652 /* Table of variables obtained from 'ctx' */
653 BIGNUM *val[TABLE_SIZE];
654 BN_MONT_CTX *mont = NULL;
655
656 if (ct) {
657 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
658 }
659
660
661 if (!BN_is_odd(m)) {
662 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
663 return (0);
664 }
665
666 bits = BN_num_bits(p);
667 if (bits == 0) {
668 /* x**0 mod 1 is still zero. */
669 if (BN_abs_is_word(m, 1)) {
670 ret = 1;
671 BN_zero(rr);
672 } else
673 ret = BN_one(rr);
674 return ret;
675 }
676
677 BN_CTX_start(ctx);
678 if ((d = BN_CTX_get(ctx)) == NULL)
679 goto err;
680 if ((r = BN_CTX_get(ctx)) == NULL)
681 goto err;
682 if ((val[0] = BN_CTX_get(ctx)) == NULL)
683 goto err;
684
685 if ((mont = in_mont) == NULL)
686 mont = BN_MONT_CTX_create(m, ctx);
687 if (mont == NULL)
688 goto err;
689
690 if (!BN_nnmod(val[0], a,m, ctx))
691 goto err;
692 aa = val[0];
693 if (BN_is_zero(aa)) {
694 BN_zero(rr);
695 ret = 1;
696 goto err;
697 }
698 if (!BN_to_montgomery(val[0], aa, mont, ctx))
699 goto err;
700
701 window = BN_window_bits_for_exponent_size(bits);
702 if (window > 1) {
703 if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
704 goto err;
705 j = 1 << (window - 1);
706 for (i = 1; i < j; i++) {
707 if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
708 !BN_mod_mul_montgomery(val[i], val[i - 1],
709 d, mont, ctx))
710 goto err;
711 }
712 }
713
714 start = 1; /* This is used to avoid multiplication etc
715 * when there is only the value '1' in the
716 * buffer. */
717 wvalue = 0; /* The 'value' of the window */
718 wstart = bits - 1; /* The top bit of the window */
719 wend = 0; /* The bottom bit of the window */
720
721 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
722 goto err;
723 for (;;) {
724 if (BN_is_bit_set(p, wstart) == 0) {
725 if (!start) {
726 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
727 goto err;
728 }
729 if (wstart == 0)
730 break;
731 wstart--;
732 continue;
733 }
734 /* We now have wstart on a 'set' bit, we now need to work out
735 * how bit a window to do. To do this we need to scan
736 * forward until the last set bit before the end of the
737 * window */
738 j = wstart;
739 wvalue = 1;
740 wend = 0;
741 for (i = 1; i < window; i++) {
742 if (wstart - i < 0)
743 break;
744 if (BN_is_bit_set(p, wstart - i)) {
745 wvalue <<= (i - wend);
746 wvalue |= 1;
747 wend = i;
748 }
749 }
750
751 /* wend is the size of the current window */
752 j = wend + 1;
753 /* add the 'bytes above' */
754 if (!start)
755 for (i = 0; i < j; i++) {
756 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
757 goto err;
758 }
759
760 /* wvalue will be an odd number < 2^window */
761 if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
762 goto err;
763
764 /* move the 'window' down further */
765 wstart -= wend + 1;
766 wvalue = 0;
767 start = 0;
768 if (wstart < 0)
769 break;
770 }
771 if (!BN_from_montgomery(rr, r,mont, ctx))
772 goto err;
773
774 ret = 1;
775
776 err:
777 if (mont != in_mont)
778 BN_MONT_CTX_free(mont);
779 BN_CTX_end(ctx);
780
781 return ret;
782}
783
784int
785BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
786 BN_CTX *ctx, BN_MONT_CTX *in_mont)
787{
788 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont,
789 (BN_get_flags(p, BN_FLG_CONSTTIME) != 0));
790}
791LCRYPTO_ALIAS(BN_mod_exp_mont);
792
793int
794BN_mod_exp_mont_ct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
795 BN_CTX *ctx, BN_MONT_CTX *in_mont)
796{
797 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 1);
798}
799
800int
801BN_mod_exp_mont_nonct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
802 BN_CTX *ctx, BN_MONT_CTX *in_mont)
803{
804 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 0);
805}
806
807int
808BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m,
809 BN_CTX *ctx, BN_MONT_CTX *in_mont)
810{
811 BN_MONT_CTX *mont = NULL;
812 int b, bits, ret = 0;
813 int r_is_one;
814 BN_ULONG w, next_w;
815 BIGNUM *d, *r, *t;
816 BIGNUM *swap_tmp;
817
818#define BN_MOD_MUL_WORD(r, w, m) \
819 (BN_mul_word(r, (w)) && \
820 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
821 (BN_mod_ct(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
822 /* BN_MOD_MUL_WORD is only used with 'w' large,
823 * so the BN_ucmp test is probably more overhead
824 * than always using BN_mod (which uses bn_copy if
825 * a similar test returns true). */
826 /* We can use BN_mod and do not need BN_nnmod because our
827 * accumulator is never negative (the result of BN_mod does
828 * not depend on the sign of the modulus).
829 */
830#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
831 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
832
833 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
834 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
835 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
836 return -1;
837 }
838
839
840 if (!BN_is_odd(m)) {
841 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
842 return (0);
843 }
844 if (m->top == 1)
845 a %= m->d[0]; /* make sure that 'a' is reduced */
846
847 bits = BN_num_bits(p);
848 if (bits == 0) {
849 /* x**0 mod 1 is still zero. */
850 if (BN_abs_is_word(m, 1)) {
851 ret = 1;
852 BN_zero(rr);
853 } else
854 ret = BN_one(rr);
855 return ret;
856 }
857 if (a == 0) {
858 BN_zero(rr);
859 ret = 1;
860 return ret;
861 }
862
863 BN_CTX_start(ctx);
864 if ((d = BN_CTX_get(ctx)) == NULL)
865 goto err;
866 if ((r = BN_CTX_get(ctx)) == NULL)
867 goto err;
868 if ((t = BN_CTX_get(ctx)) == NULL)
869 goto err;
870
871 if ((mont = in_mont) == NULL)
872 mont = BN_MONT_CTX_create(m, ctx);
873 if (mont == NULL)
874 goto err;
875
876 r_is_one = 1; /* except for Montgomery factor */
877
878 /* bits-1 >= 0 */
879
880 /* The result is accumulated in the product r*w. */
881 w = a; /* bit 'bits-1' of 'p' is always set */
882 for (b = bits - 2; b >= 0; b--) {
883 /* First, square r*w. */
884 next_w = w * w;
885 if ((next_w / w) != w) /* overflow */
886 {
887 if (r_is_one) {
888 if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
889 goto err;
890 r_is_one = 0;
891 } else {
892 if (!BN_MOD_MUL_WORD(r, w, m))
893 goto err;
894 }
895 next_w = 1;
896 }
897 w = next_w;
898 if (!r_is_one) {
899 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
900 goto err;
901 }
902
903 /* Second, multiply r*w by 'a' if exponent bit is set. */
904 if (BN_is_bit_set(p, b)) {
905 next_w = w * a;
906 if ((next_w / a) != w) /* overflow */
907 {
908 if (r_is_one) {
909 if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
910 goto err;
911 r_is_one = 0;
912 } else {
913 if (!BN_MOD_MUL_WORD(r, w, m))
914 goto err;
915 }
916 next_w = a;
917 }
918 w = next_w;
919 }
920 }
921
922 /* Finally, set r:=r*w. */
923 if (w != 1) {
924 if (r_is_one) {
925 if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
926 goto err;
927 r_is_one = 0;
928 } else {
929 if (!BN_MOD_MUL_WORD(r, w, m))
930 goto err;
931 }
932 }
933
934 if (r_is_one) /* can happen only if a == 1*/
935 {
936 if (!BN_one(rr))
937 goto err;
938 } else {
939 if (!BN_from_montgomery(rr, r, mont, ctx))
940 goto err;
941 }
942
943 ret = 1;
944
945 err:
946 if (mont != in_mont)
947 BN_MONT_CTX_free(mont);
948 BN_CTX_end(ctx);
949
950 return ret;
951}
952
953int
954BN_mod_exp_reciprocal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
955 BN_CTX *ctx)
956{
957 int i, j, bits, wstart, wend, window, wvalue;
958 int start = 1;
959 BIGNUM *aa, *q;
960 /* Table of variables obtained from 'ctx' */
961 BIGNUM *val[TABLE_SIZE];
962 BN_RECP_CTX *recp = NULL;
963 int ret = 0;
964
965 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
966 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
967 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
968 return -1;
969 }
970
971 bits = BN_num_bits(p);
972 if (bits == 0) {
973 /* x**0 mod 1 is still zero. */
974 if (BN_abs_is_word(m, 1)) {
975 ret = 1;
976 BN_zero(r);
977 } else
978 ret = BN_one(r);
979 return ret;
980 }
981
982 BN_CTX_start(ctx);
983 if ((aa = BN_CTX_get(ctx)) == NULL)
984 goto err;
985 if ((q = BN_CTX_get(ctx)) == NULL)
986 goto err;
987 if ((val[0] = BN_CTX_get(ctx)) == NULL)
988 goto err;
989
990 if ((recp = BN_RECP_CTX_create(m)) == NULL)
991 goto err;
992
993 if (!BN_nnmod(val[0], a, m, ctx))
994 goto err;
995 if (BN_is_zero(val[0])) {
996 BN_zero(r);
997 goto done;
998 }
999 if (!bn_copy(q, p))
1000 goto err;
1001
1002 window = BN_window_bits_for_exponent_size(bits);
1003 if (window > 1) {
1004 if (!BN_mod_sqr_reciprocal(aa, val[0], recp, ctx))
1005 goto err;
1006 j = 1 << (window - 1);
1007 for (i = 1; i < j; i++) {
1008 if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
1009 !BN_mod_mul_reciprocal(val[i], val[i - 1],
1010 aa, recp, ctx))
1011 goto err;
1012 }
1013 }
1014
1015 start = 1; /* This is used to avoid multiplication etc
1016 * when there is only the value '1' in the
1017 * buffer. */
1018 wvalue = 0; /* The 'value' of the window */
1019 wstart = bits - 1; /* The top bit of the window */
1020 wend = 0; /* The bottom bit of the window */
1021
1022 if (!BN_one(r))
1023 goto err;
1024
1025 for (;;) {
1026 if (BN_is_bit_set(q, wstart) == 0) {
1027 if (!start)
1028 if (!BN_mod_sqr_reciprocal(r, r, recp, ctx))
1029 goto err;
1030 if (wstart == 0)
1031 break;
1032 wstart--;
1033 continue;
1034 }
1035 /* We now have wstart on a 'set' bit, we now need to work out
1036 * how bit a window to do. To do this we need to scan
1037 * forward until the last set bit before the end of the
1038 * window */
1039 j = wstart;
1040 wvalue = 1;
1041 wend = 0;
1042 for (i = 1; i < window; i++) {
1043 if (wstart - i < 0)
1044 break;
1045 if (BN_is_bit_set(q, wstart - i)) {
1046 wvalue <<= (i - wend);
1047 wvalue |= 1;
1048 wend = i;
1049 }
1050 }
1051
1052 /* wend is the size of the current window */
1053 j = wend + 1;
1054 /* add the 'bytes above' */
1055 if (!start)
1056 for (i = 0; i < j; i++) {
1057 if (!BN_mod_sqr_reciprocal(r, r, recp, ctx))
1058 goto err;
1059 }
1060
1061 /* wvalue will be an odd number < 2^window */
1062 if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], recp, ctx))
1063 goto err;
1064
1065 /* move the 'window' down further */
1066 wstart -= wend + 1;
1067 wvalue = 0;
1068 start = 0;
1069 if (wstart < 0)
1070 break;
1071 }
1072
1073 done:
1074 ret = 1;
1075
1076 err:
1077 BN_CTX_end(ctx);
1078 BN_RECP_CTX_free(recp);
1079
1080 return ret;
1081}
1082
1083static int
1084BN_mod_exp_internal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1085 BN_CTX *ctx, int ct)
1086{
1087 int ret;
1088
1089
1090 /* For even modulus m = 2^k*m_odd, it might make sense to compute
1091 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery
1092 * exponentiation for the odd part), using appropriate exponent
1093 * reductions, and combine the results using the CRT.
1094 *
1095 * For now, we use Montgomery only if the modulus is odd; otherwise,
1096 * exponentiation using the reciprocal-based quick remaindering
1097 * algorithm is used.
1098 *
1099 * (Timing obtained with expspeed.c [computations a^p mod m
1100 * where a, p, m are of the same length: 256, 512, 1024, 2048,
1101 * 4096, 8192 bits], compared to the running time of the
1102 * standard algorithm:
1103 *
1104 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
1105 * 55 .. 77 % [UltraSparc processor, but
1106 * debug-solaris-sparcv8-gcc conf.]
1107 *
1108 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
1109 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
1110 *
1111 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
1112 * at 2048 and more bits, but at 512 and 1024 bits, it was
1113 * slower even than the standard algorithm!
1114 *
1115 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
1116 * should be obtained when the new Montgomery reduction code
1117 * has been integrated into OpenSSL.)
1118 */
1119
1120 if (BN_is_odd(m)) {
1121 if (a->top == 1 && !a->neg && !ct) {
1122 BN_ULONG A = a->d[0];
1123 ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL);
1124 } else
1125 ret = BN_mod_exp_mont_ct(r, a,p, m,ctx, NULL);
1126 } else {
1127 ret = BN_mod_exp_reciprocal(r, a,p, m, ctx);
1128 }
1129
1130 return (ret);
1131}
1132
1133int
1134BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1135 BN_CTX *ctx)
1136{
1137 return BN_mod_exp_internal(r, a, p, m, ctx,
1138 (BN_get_flags(p, BN_FLG_CONSTTIME) != 0));
1139}
1140LCRYPTO_ALIAS(BN_mod_exp);
1141
1142int
1143BN_mod_exp_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1144 BN_CTX *ctx)
1145{
1146 return BN_mod_exp_internal(r, a, p, m, ctx, 1);
1147}
1148
1149int
1150BN_mod_exp_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1151 BN_CTX *ctx)
1152{
1153 return BN_mod_exp_internal(r, a, p, m, ctx, 0);
1154}
1155
1156int
1157BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
1158 const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m, BN_CTX *ctx,
1159 BN_MONT_CTX *in_mont)
1160{
1161 int i, j, bits, b, bits1, bits2, ret = 0, wpos1, wpos2, window1, window2, wvalue1, wvalue2;
1162 int r_is_one = 1;
1163 BIGNUM *d, *r;
1164 const BIGNUM *a_mod_m;
1165 /* Tables of variables obtained from 'ctx' */
1166 BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE];
1167 BN_MONT_CTX *mont = NULL;
1168
1169
1170 if (!BN_is_odd(m)) {
1171 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
1172 return (0);
1173 }
1174 bits1 = BN_num_bits(p1);
1175 bits2 = BN_num_bits(p2);
1176 if ((bits1 == 0) && (bits2 == 0)) {
1177 ret = BN_one(rr);
1178 return ret;
1179 }
1180
1181 bits = (bits1 > bits2) ? bits1 : bits2;
1182
1183 BN_CTX_start(ctx);
1184 if ((d = BN_CTX_get(ctx)) == NULL)
1185 goto err;
1186 if ((r = BN_CTX_get(ctx)) == NULL)
1187 goto err;
1188 if ((val1[0] = BN_CTX_get(ctx)) == NULL)
1189 goto err;
1190 if ((val2[0] = BN_CTX_get(ctx)) == NULL)
1191 goto err;
1192
1193 if ((mont = in_mont) == NULL)
1194 mont = BN_MONT_CTX_create(m, ctx);
1195 if (mont == NULL)
1196 goto err;
1197
1198 window1 = BN_window_bits_for_exponent_size(bits1);
1199 window2 = BN_window_bits_for_exponent_size(bits2);
1200
1201 /*
1202 * Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 .. 2^(window1-1)
1203 */
1204 if (!BN_nnmod(val1[0], a1, m, ctx))
1205 goto err;
1206 a_mod_m = val1[0];
1207 if (BN_is_zero(a_mod_m)) {
1208 BN_zero(rr);
1209 ret = 1;
1210 goto err;
1211 }
1212
1213 if (!BN_to_montgomery(val1[0], a_mod_m, mont, ctx))
1214 goto err;
1215 if (window1 > 1) {
1216 if (!BN_mod_mul_montgomery(d, val1[0], val1[0], mont, ctx))
1217 goto err;
1218
1219 j = 1 << (window1 - 1);
1220 for (i = 1; i < j; i++) {
1221 if (((val1[i] = BN_CTX_get(ctx)) == NULL) ||
1222 !BN_mod_mul_montgomery(val1[i], val1[i - 1],
1223 d, mont, ctx))
1224 goto err;
1225 }
1226 }
1227
1228
1229 /*
1230 * Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 .. 2^(window2-1)
1231 */
1232 if (!BN_nnmod(val2[0], a2, m, ctx))
1233 goto err;
1234 a_mod_m = val2[0];
1235 if (BN_is_zero(a_mod_m)) {
1236 BN_zero(rr);
1237 ret = 1;
1238 goto err;
1239 }
1240 if (!BN_to_montgomery(val2[0], a_mod_m, mont, ctx))
1241 goto err;
1242 if (window2 > 1) {
1243 if (!BN_mod_mul_montgomery(d, val2[0], val2[0], mont, ctx))
1244 goto err;
1245
1246 j = 1 << (window2 - 1);
1247 for (i = 1; i < j; i++) {
1248 if (((val2[i] = BN_CTX_get(ctx)) == NULL) ||
1249 !BN_mod_mul_montgomery(val2[i], val2[i - 1],
1250 d, mont, ctx))
1251 goto err;
1252 }
1253 }
1254
1255
1256 /* Now compute the power product, using independent windows. */
1257 r_is_one = 1;
1258 wvalue1 = 0; /* The 'value' of the first window */
1259 wvalue2 = 0; /* The 'value' of the second window */
1260 wpos1 = 0; /* If wvalue1 > 0, the bottom bit of the first window */
1261 wpos2 = 0; /* If wvalue2 > 0, the bottom bit of the second window */
1262
1263 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
1264 goto err;
1265 for (b = bits - 1; b >= 0; b--) {
1266 if (!r_is_one) {
1267 if (!BN_mod_mul_montgomery(r, r,r, mont, ctx))
1268 goto err;
1269 }
1270
1271 if (!wvalue1)
1272 if (BN_is_bit_set(p1, b)) {
1273 /* consider bits b-window1+1 .. b for this window */
1274 i = b - window1 + 1;
1275 while (!BN_is_bit_set(p1, i)) /* works for i<0 */
1276 i++;
1277 wpos1 = i;
1278 wvalue1 = 1;
1279 for (i = b - 1; i >= wpos1; i--) {
1280 wvalue1 <<= 1;
1281 if (BN_is_bit_set(p1, i))
1282 wvalue1++;
1283 }
1284 }
1285
1286 if (!wvalue2)
1287 if (BN_is_bit_set(p2, b)) {
1288 /* consider bits b-window2+1 .. b for this window */
1289 i = b - window2 + 1;
1290 while (!BN_is_bit_set(p2, i))
1291 i++;
1292 wpos2 = i;
1293 wvalue2 = 1;
1294 for (i = b - 1; i >= wpos2; i--) {
1295 wvalue2 <<= 1;
1296 if (BN_is_bit_set(p2, i))
1297 wvalue2++;
1298 }
1299 }
1300
1301 if (wvalue1 && b == wpos1) {
1302 /* wvalue1 is odd and < 2^window1 */
1303 if (!BN_mod_mul_montgomery(r, r, val1[wvalue1 >> 1],
1304 mont, ctx))
1305 goto err;
1306 wvalue1 = 0;
1307 r_is_one = 0;
1308 }
1309
1310 if (wvalue2 && b == wpos2) {
1311 /* wvalue2 is odd and < 2^window2 */
1312 if (!BN_mod_mul_montgomery(r, r, val2[wvalue2 >> 1],
1313 mont, ctx))
1314 goto err;
1315 wvalue2 = 0;
1316 r_is_one = 0;
1317 }
1318 }
1319 if (!BN_from_montgomery(rr, r,mont, ctx))
1320 goto err;
1321
1322 ret = 1;
1323
1324 err:
1325 if (mont != in_mont)
1326 BN_MONT_CTX_free(mont);
1327 BN_CTX_end(ctx);
1328
1329 return ret;
1330}
diff --git a/src/lib/libcrypto/bn/bn_gcd.c b/src/lib/libcrypto/bn/bn_gcd.c
deleted file mode 100644
index fa5d71a7f3..0000000000
--- a/src/lib/libcrypto/bn/bn_gcd.c
+++ /dev/null
@@ -1,818 +0,0 @@
1/* $OpenBSD: bn_gcd.c,v 1.29 2024/04/10 14:58:06 beck Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include <openssl/err.h>
113
114#include "bn_local.h"
115
116static BIGNUM *
117euclid(BIGNUM *a, BIGNUM *b)
118{
119 BIGNUM *t;
120 int shifts = 0;
121
122 /* Loop invariant: 0 <= b <= a. */
123 while (!BN_is_zero(b)) {
124 if (BN_is_odd(a) && BN_is_odd(b)) {
125 if (!BN_sub(a, a, b))
126 goto err;
127 if (!BN_rshift1(a, a))
128 goto err;
129 } else if (BN_is_odd(a) && !BN_is_odd(b)) {
130 if (!BN_rshift1(b, b))
131 goto err;
132 } else if (!BN_is_odd(a) && BN_is_odd(b)) {
133 if (!BN_rshift1(a, a))
134 goto err;
135 } else {
136 if (!BN_rshift1(a, a))
137 goto err;
138 if (!BN_rshift1(b, b))
139 goto err;
140 shifts++;
141 continue;
142 }
143
144 if (BN_cmp(a, b) < 0) {
145 t = a;
146 a = b;
147 b = t;
148 }
149 }
150
151 if (shifts) {
152 if (!BN_lshift(a, a, shifts))
153 goto err;
154 }
155
156 return a;
157
158 err:
159 return NULL;
160}
161
162int
163BN_gcd(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx)
164{
165 BIGNUM *a, *b, *t;
166 int ret = 0;
167
168 BN_CTX_start(ctx);
169 if ((a = BN_CTX_get(ctx)) == NULL)
170 goto err;
171 if ((b = BN_CTX_get(ctx)) == NULL)
172 goto err;
173
174 if (!bn_copy(a, in_a))
175 goto err;
176 if (!bn_copy(b, in_b))
177 goto err;
178 a->neg = 0;
179 b->neg = 0;
180
181 if (BN_cmp(a, b) < 0) {
182 t = a;
183 a = b;
184 b = t;
185 }
186 t = euclid(a, b);
187 if (t == NULL)
188 goto err;
189
190 if (!bn_copy(r, t))
191 goto err;
192 ret = 1;
193
194 err:
195 BN_CTX_end(ctx);
196 return (ret);
197}
198LCRYPTO_ALIAS(BN_gcd);
199
200/*
201 * BN_gcd_no_branch is a special version of BN_mod_inverse_no_branch.
202 * that returns the GCD.
203 */
204static BIGNUM *
205BN_gcd_no_branch(BIGNUM *in, const BIGNUM *a, const BIGNUM *n,
206 BN_CTX *ctx)
207{
208 BIGNUM *A, *B, *X, *Y, *M, *D, *T, *R = NULL;
209 BIGNUM local_A, local_B;
210 BIGNUM *pA, *pB;
211 BIGNUM *ret = NULL;
212 int sign;
213
214 if (in == NULL)
215 goto err;
216 R = in;
217
218 BN_init(&local_A);
219 BN_init(&local_B);
220
221 BN_CTX_start(ctx);
222 if ((A = BN_CTX_get(ctx)) == NULL)
223 goto err;
224 if ((B = BN_CTX_get(ctx)) == NULL)
225 goto err;
226 if ((X = BN_CTX_get(ctx)) == NULL)
227 goto err;
228 if ((D = BN_CTX_get(ctx)) == NULL)
229 goto err;
230 if ((M = BN_CTX_get(ctx)) == NULL)
231 goto err;
232 if ((Y = BN_CTX_get(ctx)) == NULL)
233 goto err;
234 if ((T = BN_CTX_get(ctx)) == NULL)
235 goto err;
236
237 if (!BN_one(X))
238 goto err;
239 BN_zero(Y);
240 if (!bn_copy(B, a))
241 goto err;
242 if (!bn_copy(A, n))
243 goto err;
244 A->neg = 0;
245
246 if (B->neg || (BN_ucmp(B, A) >= 0)) {
247 /*
248 * Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked,
249 * BN_div_no_branch will be called eventually.
250 */
251 pB = &local_B;
252 /* BN_init() done at the top of the function. */
253 BN_with_flags(pB, B, BN_FLG_CONSTTIME);
254 if (!BN_nnmod(B, pB, A, ctx))
255 goto err;
256 }
257 sign = -1;
258 /* From B = a mod |n|, A = |n| it follows that
259 *
260 * 0 <= B < A,
261 * -sign*X*a == B (mod |n|),
262 * sign*Y*a == A (mod |n|).
263 */
264
265 while (!BN_is_zero(B)) {
266 BIGNUM *tmp;
267
268 /*
269 * 0 < B < A,
270 * (*) -sign*X*a == B (mod |n|),
271 * sign*Y*a == A (mod |n|)
272 */
273
274 /*
275 * Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked,
276 * BN_div_no_branch will be called eventually.
277 */
278 pA = &local_A;
279 /* BN_init() done at the top of the function. */
280 BN_with_flags(pA, A, BN_FLG_CONSTTIME);
281
282 /* (D, M) := (A/B, A%B) ... */
283 if (!BN_div_ct(D, M, pA, B, ctx))
284 goto err;
285
286 /* Now
287 * A = D*B + M;
288 * thus we have
289 * (**) sign*Y*a == D*B + M (mod |n|).
290 */
291 tmp = A; /* keep the BIGNUM object, the value does not matter */
292
293 /* (A, B) := (B, A mod B) ... */
294 A = B;
295 B = M;
296 /* ... so we have 0 <= B < A again */
297
298 /* Since the former M is now B and the former B is now A,
299 * (**) translates into
300 * sign*Y*a == D*A + B (mod |n|),
301 * i.e.
302 * sign*Y*a - D*A == B (mod |n|).
303 * Similarly, (*) translates into
304 * -sign*X*a == A (mod |n|).
305 *
306 * Thus,
307 * sign*Y*a + D*sign*X*a == B (mod |n|),
308 * i.e.
309 * sign*(Y + D*X)*a == B (mod |n|).
310 *
311 * So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at
312 * -sign*X*a == B (mod |n|),
313 * sign*Y*a == A (mod |n|).
314 * Note that X and Y stay non-negative all the time.
315 */
316
317 if (!BN_mul(tmp, D, X, ctx))
318 goto err;
319 if (!BN_add(tmp, tmp, Y))
320 goto err;
321
322 M = Y; /* keep the BIGNUM object, the value does not matter */
323 Y = X;
324 X = tmp;
325 sign = -sign;
326 }
327
328 /*
329 * The while loop (Euclid's algorithm) ends when
330 * A == gcd(a,n);
331 */
332
333 if (!bn_copy(R, A))
334 goto err;
335 ret = R;
336 err:
337 if ((ret == NULL) && (in == NULL))
338 BN_free(R);
339 BN_CTX_end(ctx);
340 return (ret);
341}
342
343int
344BN_gcd_ct(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx)
345{
346 if (BN_gcd_no_branch(r, in_a, in_b, ctx) == NULL)
347 return 0;
348 return 1;
349}
350
351/* BN_mod_inverse_no_branch is a special version of BN_mod_inverse.
352 * It does not contain branches that may leak sensitive information.
353 */
354static BIGNUM *
355BN_mod_inverse_no_branch(BIGNUM *in, const BIGNUM *a, const BIGNUM *n,
356 BN_CTX *ctx)
357{
358 BIGNUM *A, *B, *X, *Y, *M, *D, *T, *R = NULL;
359 BIGNUM local_A, local_B;
360 BIGNUM *pA, *pB;
361 BIGNUM *ret = NULL;
362 int sign;
363
364 BN_init(&local_A);
365 BN_init(&local_B);
366
367 BN_CTX_start(ctx);
368 if ((A = BN_CTX_get(ctx)) == NULL)
369 goto err;
370 if ((B = BN_CTX_get(ctx)) == NULL)
371 goto err;
372 if ((X = BN_CTX_get(ctx)) == NULL)
373 goto err;
374 if ((D = BN_CTX_get(ctx)) == NULL)
375 goto err;
376 if ((M = BN_CTX_get(ctx)) == NULL)
377 goto err;
378 if ((Y = BN_CTX_get(ctx)) == NULL)
379 goto err;
380 if ((T = BN_CTX_get(ctx)) == NULL)
381 goto err;
382
383 if (in == NULL)
384 R = BN_new();
385 else
386 R = in;
387 if (R == NULL)
388 goto err;
389
390 if (!BN_one(X))
391 goto err;
392 BN_zero(Y);
393 if (!bn_copy(B, a))
394 goto err;
395 if (!bn_copy(A, n))
396 goto err;
397 A->neg = 0;
398
399 if (B->neg || (BN_ucmp(B, A) >= 0)) {
400 /*
401 * Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked,
402 * BN_div_no_branch will be called eventually.
403 */
404 pB = &local_B;
405 /* BN_init() done at the top of the function. */
406 BN_with_flags(pB, B, BN_FLG_CONSTTIME);
407 if (!BN_nnmod(B, pB, A, ctx))
408 goto err;
409 }
410 sign = -1;
411 /* From B = a mod |n|, A = |n| it follows that
412 *
413 * 0 <= B < A,
414 * -sign*X*a == B (mod |n|),
415 * sign*Y*a == A (mod |n|).
416 */
417
418 while (!BN_is_zero(B)) {
419 BIGNUM *tmp;
420
421 /*
422 * 0 < B < A,
423 * (*) -sign*X*a == B (mod |n|),
424 * sign*Y*a == A (mod |n|)
425 */
426
427 /*
428 * Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked,
429 * BN_div_no_branch will be called eventually.
430 */
431 pA = &local_A;
432 /* BN_init() done at the top of the function. */
433 BN_with_flags(pA, A, BN_FLG_CONSTTIME);
434
435 /* (D, M) := (A/B, A%B) ... */
436 if (!BN_div_ct(D, M, pA, B, ctx))
437 goto err;
438
439 /* Now
440 * A = D*B + M;
441 * thus we have
442 * (**) sign*Y*a == D*B + M (mod |n|).
443 */
444 tmp = A; /* keep the BIGNUM object, the value does not matter */
445
446 /* (A, B) := (B, A mod B) ... */
447 A = B;
448 B = M;
449 /* ... so we have 0 <= B < A again */
450
451 /* Since the former M is now B and the former B is now A,
452 * (**) translates into
453 * sign*Y*a == D*A + B (mod |n|),
454 * i.e.
455 * sign*Y*a - D*A == B (mod |n|).
456 * Similarly, (*) translates into
457 * -sign*X*a == A (mod |n|).
458 *
459 * Thus,
460 * sign*Y*a + D*sign*X*a == B (mod |n|),
461 * i.e.
462 * sign*(Y + D*X)*a == B (mod |n|).
463 *
464 * So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at
465 * -sign*X*a == B (mod |n|),
466 * sign*Y*a == A (mod |n|).
467 * Note that X and Y stay non-negative all the time.
468 */
469
470 if (!BN_mul(tmp, D, X, ctx))
471 goto err;
472 if (!BN_add(tmp, tmp, Y))
473 goto err;
474
475 M = Y; /* keep the BIGNUM object, the value does not matter */
476 Y = X;
477 X = tmp;
478 sign = -sign;
479 }
480
481 /*
482 * The while loop (Euclid's algorithm) ends when
483 * A == gcd(a,n);
484 * we have
485 * sign*Y*a == A (mod |n|),
486 * where Y is non-negative.
487 */
488
489 if (sign < 0) {
490 if (!BN_sub(Y, n, Y))
491 goto err;
492 }
493 /* Now Y*a == A (mod |n|). */
494
495 if (!BN_is_one(A)) {
496 BNerror(BN_R_NO_INVERSE);
497 goto err;
498 }
499
500 if (!BN_nnmod(Y, Y, n, ctx))
501 goto err;
502 if (!bn_copy(R, Y))
503 goto err;
504
505 ret = R;
506
507 err:
508 if ((ret == NULL) && (in == NULL))
509 BN_free(R);
510 BN_CTX_end(ctx);
511 return (ret);
512}
513
514/* solves ax == 1 (mod n) */
515static BIGNUM *
516BN_mod_inverse_internal(BIGNUM *in, const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx,
517 int ct)
518{
519 BIGNUM *A, *B, *X, *Y, *M, *D, *T, *R = NULL;
520 BIGNUM *ret = NULL;
521 int sign;
522
523 if (ct)
524 return BN_mod_inverse_no_branch(in, a, n, ctx);
525
526 BN_CTX_start(ctx);
527 if ((A = BN_CTX_get(ctx)) == NULL)
528 goto err;
529 if ((B = BN_CTX_get(ctx)) == NULL)
530 goto err;
531 if ((X = BN_CTX_get(ctx)) == NULL)
532 goto err;
533 if ((D = BN_CTX_get(ctx)) == NULL)
534 goto err;
535 if ((M = BN_CTX_get(ctx)) == NULL)
536 goto err;
537 if ((Y = BN_CTX_get(ctx)) == NULL)
538 goto err;
539 if ((T = BN_CTX_get(ctx)) == NULL)
540 goto err;
541
542 if (in == NULL)
543 R = BN_new();
544 else
545 R = in;
546 if (R == NULL)
547 goto err;
548
549 if (!BN_one(X))
550 goto err;
551 BN_zero(Y);
552 if (!bn_copy(B, a))
553 goto err;
554 if (!bn_copy(A, n))
555 goto err;
556 A->neg = 0;
557 if (B->neg || (BN_ucmp(B, A) >= 0)) {
558 if (!BN_nnmod(B, B, A, ctx))
559 goto err;
560 }
561 sign = -1;
562 /* From B = a mod |n|, A = |n| it follows that
563 *
564 * 0 <= B < A,
565 * -sign*X*a == B (mod |n|),
566 * sign*Y*a == A (mod |n|).
567 */
568
569 if (BN_is_odd(n) && (BN_num_bits(n) <= (BN_BITS <= 32 ? 450 : 2048))) {
570 /* Binary inversion algorithm; requires odd modulus.
571 * This is faster than the general algorithm if the modulus
572 * is sufficiently small (about 400 .. 500 bits on 32-bit
573 * systems, but much more on 64-bit systems) */
574 int shift;
575
576 while (!BN_is_zero(B)) {
577 /*
578 * 0 < B < |n|,
579 * 0 < A <= |n|,
580 * (1) -sign*X*a == B (mod |n|),
581 * (2) sign*Y*a == A (mod |n|)
582 */
583
584 /* Now divide B by the maximum possible power of two in the integers,
585 * and divide X by the same value mod |n|.
586 * When we're done, (1) still holds. */
587 shift = 0;
588 while (!BN_is_bit_set(B, shift)) /* note that 0 < B */
589 {
590 shift++;
591
592 if (BN_is_odd(X)) {
593 if (!BN_uadd(X, X, n))
594 goto err;
595 }
596 /* now X is even, so we can easily divide it by two */
597 if (!BN_rshift1(X, X))
598 goto err;
599 }
600 if (shift > 0) {
601 if (!BN_rshift(B, B, shift))
602 goto err;
603 }
604
605 /* Same for A and Y. Afterwards, (2) still holds. */
606 shift = 0;
607 while (!BN_is_bit_set(A, shift)) /* note that 0 < A */
608 {
609 shift++;
610
611 if (BN_is_odd(Y)) {
612 if (!BN_uadd(Y, Y, n))
613 goto err;
614 }
615 /* now Y is even */
616 if (!BN_rshift1(Y, Y))
617 goto err;
618 }
619 if (shift > 0) {
620 if (!BN_rshift(A, A, shift))
621 goto err;
622 }
623
624 /* We still have (1) and (2).
625 * Both A and B are odd.
626 * The following computations ensure that
627 *
628 * 0 <= B < |n|,
629 * 0 < A < |n|,
630 * (1) -sign*X*a == B (mod |n|),
631 * (2) sign*Y*a == A (mod |n|),
632 *
633 * and that either A or B is even in the next iteration.
634 */
635 if (BN_ucmp(B, A) >= 0) {
636 /* -sign*(X + Y)*a == B - A (mod |n|) */
637 if (!BN_uadd(X, X, Y))
638 goto err;
639 /* NB: we could use BN_mod_add_quick(X, X, Y, n), but that
640 * actually makes the algorithm slower */
641 if (!BN_usub(B, B, A))
642 goto err;
643 } else {
644 /* sign*(X + Y)*a == A - B (mod |n|) */
645 if (!BN_uadd(Y, Y, X))
646 goto err;
647 /* as above, BN_mod_add_quick(Y, Y, X, n) would slow things down */
648 if (!BN_usub(A, A, B))
649 goto err;
650 }
651 }
652 } else {
653 /* general inversion algorithm */
654
655 while (!BN_is_zero(B)) {
656 BIGNUM *tmp;
657
658 /*
659 * 0 < B < A,
660 * (*) -sign*X*a == B (mod |n|),
661 * sign*Y*a == A (mod |n|)
662 */
663
664 /* (D, M) := (A/B, A%B) ... */
665 if (BN_num_bits(A) == BN_num_bits(B)) {
666 if (!BN_one(D))
667 goto err;
668 if (!BN_sub(M, A, B))
669 goto err;
670 } else if (BN_num_bits(A) == BN_num_bits(B) + 1) {
671 /* A/B is 1, 2, or 3 */
672 if (!BN_lshift1(T, B))
673 goto err;
674 if (BN_ucmp(A, T) < 0) {
675 /* A < 2*B, so D=1 */
676 if (!BN_one(D))
677 goto err;
678 if (!BN_sub(M, A, B))
679 goto err;
680 } else {
681 /* A >= 2*B, so D=2 or D=3 */
682 if (!BN_sub(M, A, T))
683 goto err;
684 if (!BN_add(D,T,B)) goto err; /* use D (:= 3*B) as temp */
685 if (BN_ucmp(A, D) < 0) {
686 /* A < 3*B, so D=2 */
687 if (!BN_set_word(D, 2))
688 goto err;
689 /* M (= A - 2*B) already has the correct value */
690 } else {
691 /* only D=3 remains */
692 if (!BN_set_word(D, 3))
693 goto err;
694 /* currently M = A - 2*B, but we need M = A - 3*B */
695 if (!BN_sub(M, M, B))
696 goto err;
697 }
698 }
699 } else {
700 if (!BN_div_nonct(D, M, A, B, ctx))
701 goto err;
702 }
703
704 /* Now
705 * A = D*B + M;
706 * thus we have
707 * (**) sign*Y*a == D*B + M (mod |n|).
708 */
709 tmp = A; /* keep the BIGNUM object, the value does not matter */
710
711 /* (A, B) := (B, A mod B) ... */
712 A = B;
713 B = M;
714 /* ... so we have 0 <= B < A again */
715
716 /* Since the former M is now B and the former B is now A,
717 * (**) translates into
718 * sign*Y*a == D*A + B (mod |n|),
719 * i.e.
720 * sign*Y*a - D*A == B (mod |n|).
721 * Similarly, (*) translates into
722 * -sign*X*a == A (mod |n|).
723 *
724 * Thus,
725 * sign*Y*a + D*sign*X*a == B (mod |n|),
726 * i.e.
727 * sign*(Y + D*X)*a == B (mod |n|).
728 *
729 * So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at
730 * -sign*X*a == B (mod |n|),
731 * sign*Y*a == A (mod |n|).
732 * Note that X and Y stay non-negative all the time.
733 */
734
735 /* most of the time D is very small, so we can optimize tmp := D*X+Y */
736 if (BN_is_one(D)) {
737 if (!BN_add(tmp, X, Y))
738 goto err;
739 } else {
740 if (BN_is_word(D, 2)) {
741 if (!BN_lshift1(tmp, X))
742 goto err;
743 } else if (BN_is_word(D, 4)) {
744 if (!BN_lshift(tmp, X, 2))
745 goto err;
746 } else if (D->top == 1) {
747 if (!bn_copy(tmp, X))
748 goto err;
749 if (!BN_mul_word(tmp, D->d[0]))
750 goto err;
751 } else {
752 if (!BN_mul(tmp, D,X, ctx))
753 goto err;
754 }
755 if (!BN_add(tmp, tmp, Y))
756 goto err;
757 }
758
759 M = Y; /* keep the BIGNUM object, the value does not matter */
760 Y = X;
761 X = tmp;
762 sign = -sign;
763 }
764 }
765
766 /*
767 * The while loop (Euclid's algorithm) ends when
768 * A == gcd(a,n);
769 * we have
770 * sign*Y*a == A (mod |n|),
771 * where Y is non-negative.
772 */
773
774 if (sign < 0) {
775 if (!BN_sub(Y, n, Y))
776 goto err;
777 }
778 /* Now Y*a == A (mod |n|). */
779
780 if (!BN_is_one(A)) {
781 BNerror(BN_R_NO_INVERSE);
782 goto err;
783 }
784
785 if (!BN_nnmod(Y, Y, n, ctx))
786 goto err;
787 if (!bn_copy(R, Y))
788 goto err;
789
790 ret = R;
791
792 err:
793 if ((ret == NULL) && (in == NULL))
794 BN_free(R);
795 BN_CTX_end(ctx);
796 return (ret);
797}
798
799BIGNUM *
800BN_mod_inverse(BIGNUM *in, const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
801{
802 int ct = ((BN_get_flags(a, BN_FLG_CONSTTIME) != 0) ||
803 (BN_get_flags(n, BN_FLG_CONSTTIME) != 0));
804 return BN_mod_inverse_internal(in, a, n, ctx, ct);
805}
806LCRYPTO_ALIAS(BN_mod_inverse);
807
808BIGNUM *
809BN_mod_inverse_nonct(BIGNUM *in, const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
810{
811 return BN_mod_inverse_internal(in, a, n, ctx, 0);
812}
813
814BIGNUM *
815BN_mod_inverse_ct(BIGNUM *in, const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
816{
817 return BN_mod_inverse_internal(in, a, n, ctx, 1);
818}
diff --git a/src/lib/libcrypto/bn/bn_internal.h b/src/lib/libcrypto/bn/bn_internal.h
deleted file mode 100644
index fd04bc9f8a..0000000000
--- a/src/lib/libcrypto/bn/bn_internal.h
+++ /dev/null
@@ -1,568 +0,0 @@
1/* $OpenBSD: bn_internal.h,v 1.15 2023/06/25 11:42:26 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <openssl/bn.h>
19
20#include "bn_arch.h"
21
22#ifndef HEADER_BN_INTERNAL_H
23#define HEADER_BN_INTERNAL_H
24
25int bn_word_clz(BN_ULONG w);
26
27int bn_bitsize(const BIGNUM *bn);
28
29#ifndef HAVE_BN_CT_NE_ZERO
30static inline int
31bn_ct_ne_zero(BN_ULONG w)
32{
33 return (w | ~(w - 1)) >> (BN_BITS2 - 1);
34}
35#endif
36
37#ifndef HAVE_BN_CT_NE_ZERO_MASK
38static inline BN_ULONG
39bn_ct_ne_zero_mask(BN_ULONG w)
40{
41 return 0 - bn_ct_ne_zero(w);
42}
43#endif
44
45#ifndef HAVE_BN_CT_EQ_ZERO
46static inline int
47bn_ct_eq_zero(BN_ULONG w)
48{
49 return 1 - bn_ct_ne_zero(w);
50}
51#endif
52
53#ifndef HAVE_BN_CT_EQ_ZERO_MASK
54static inline BN_ULONG
55bn_ct_eq_zero_mask(BN_ULONG w)
56{
57 return 0 - bn_ct_eq_zero(w);
58}
59#endif
60
61#ifndef HAVE_BN_CLZW
62static inline int
63bn_clzw(BN_ULONG w)
64{
65 return bn_word_clz(w);
66}
67#endif
68
69/*
70 * Big number primitives are named as the operation followed by a suffix
71 * that indicates the number of words that it operates on, where 'w' means
72 * single word, 'dw' means double word, 'tw' means triple word and 'qw' means
73 * quadruple word. Unless otherwise noted, the size of the output is implied
74 * based on its inputs, for example bn_mulw() takes two single word inputs
75 * and is going to produce a double word result.
76 *
77 * Where a function implements multiple operations, these are listed in order.
78 * For example, a function that computes (r1:r0) = a * b + c is named
79 * bn_mulw_addw(), producing a double word result.
80 */
81
82/*
83 * Default implementations for BN_ULLONG architectures.
84 *
85 * On these platforms the C compiler is generally better at optimising without
86 * the use of inline assembly primitives. However, it can be difficult for the
87 * compiler to see through primitives in order to combine operations, due to
88 * type changes/narrowing. For this reason compound primitives are usually
89 * explicitly provided.
90 */
91#ifdef BN_ULLONG
92
93#ifndef HAVE_BN_ADDW
94#define HAVE_BN_ADDW
95static inline void
96bn_addw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
97{
98 BN_ULLONG r;
99
100 r = (BN_ULLONG)a + (BN_ULLONG)b;
101
102 *out_r1 = r >> BN_BITS2;
103 *out_r0 = r & BN_MASK2;
104}
105#endif
106
107#ifndef HAVE_BN_ADDW_ADDW
108#define HAVE_BN_ADDW_ADDW
109static inline void
110bn_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
111 BN_ULONG *out_r0)
112{
113 BN_ULLONG r;
114
115 r = (BN_ULLONG)a + (BN_ULLONG)b + (BN_ULLONG)c;
116
117 *out_r1 = r >> BN_BITS2;
118 *out_r0 = r & BN_MASK2;
119}
120#endif
121
122#ifndef HAVE_BN_MULW
123#define HAVE_BN_MULW
124static inline void
125bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
126{
127 BN_ULLONG r;
128
129 r = (BN_ULLONG)a * (BN_ULLONG)b;
130
131 *out_r1 = r >> BN_BITS2;
132 *out_r0 = r & BN_MASK2;
133}
134#endif
135
136#ifndef HAVE_BN_MULW_ADDW
137#define HAVE_BN_MULW_ADDW
138static inline void
139bn_mulw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
140 BN_ULONG *out_r0)
141{
142 BN_ULLONG r;
143
144 r = (BN_ULLONG)a * (BN_ULLONG)b + (BN_ULLONG)c;
145
146 *out_r1 = r >> BN_BITS2;
147 *out_r0 = r & BN_MASK2;
148}
149#endif
150
151#ifndef HAVE_BN_MULW_ADDW_ADDW
152#define HAVE_BN_MULW_ADDW_ADDW
153static inline void
154bn_mulw_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG d,
155 BN_ULONG *out_r1, BN_ULONG *out_r0)
156{
157 BN_ULLONG r;
158
159 r = (BN_ULLONG)a * (BN_ULLONG)b + (BN_ULLONG)c + (BN_ULLONG)d;
160
161 *out_r1 = r >> BN_BITS2;
162 *out_r0 = r & BN_MASK2;
163}
164#endif
165
166#endif /* !BN_ULLONG */
167
168/*
169 * bn_addw() computes (r1:r0) = a + b, where both inputs are single words,
170 * producing a double word result. The value of r1 is the carry from the
171 * addition.
172 */
173#ifndef HAVE_BN_ADDW
174static inline void
175bn_addw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
176{
177 BN_ULONG r1, r0, c1, c2;
178
179 c1 = a | b;
180 c2 = a & b;
181 r0 = a + b;
182 r1 = ((c1 & ~r0) | c2) >> (BN_BITS2 - 1); /* carry */
183
184 *out_r1 = r1;
185 *out_r0 = r0;
186}
187#endif
188
189/*
190 * bn_addw_addw() computes (r1:r0) = a + b + c, where all inputs are single
191 * words, producing a double word result.
192 */
193#ifndef HAVE_BN_ADDW_ADDW
194static inline void
195bn_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
196 BN_ULONG *out_r0)
197{
198 BN_ULONG carry, r1, r0;
199
200 bn_addw(a, b, &r1, &r0);
201 bn_addw(r0, c, &carry, &r0);
202 r1 += carry;
203
204 *out_r1 = r1;
205 *out_r0 = r0;
206}
207#endif
208
209/*
210 * bn_qwaddqw() computes
211 * (r4:r3:r2:r1:r0) = (a3:a2:a1:a0) + (b3:b2:b1:b0) + carry, where a is a quad word,
212 * b is a quad word, and carry is a single word with value 0 or 1, producing a four
213 * word result and carry.
214 */
215#ifndef HAVE_BN_QWADDQW
216static inline void
217bn_qwaddqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
218 BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG carry, BN_ULONG *out_carry,
219 BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
220{
221 BN_ULONG r3, r2, r1, r0;
222
223 bn_addw_addw(a0, b0, carry, &carry, &r0);
224 bn_addw_addw(a1, b1, carry, &carry, &r1);
225 bn_addw_addw(a2, b2, carry, &carry, &r2);
226 bn_addw_addw(a3, b3, carry, &carry, &r3);
227
228 *out_carry = carry;
229 *out_r3 = r3;
230 *out_r2 = r2;
231 *out_r1 = r1;
232 *out_r0 = r0;
233}
234#endif
235
236/*
237 * bn_subw() computes r0 = a - b, where both inputs are single words,
238 * producing a single word result and borrow.
239 */
240#ifndef HAVE_BN_SUBW
241static inline void
242bn_subw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_borrow, BN_ULONG *out_r0)
243{
244 BN_ULONG borrow, r0;
245
246 r0 = a - b;
247 borrow = ((r0 | (b & ~a)) & (b | ~a)) >> (BN_BITS2 - 1);
248
249 *out_borrow = borrow;
250 *out_r0 = r0;
251}
252#endif
253
254/*
255 * bn_subw_subw() computes r0 = a - b - c, where all inputs are single words,
256 * producing a single word result and borrow.
257 */
258#ifndef HAVE_BN_SUBW_SUBW
259static inline void
260bn_subw_subw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_borrow,
261 BN_ULONG *out_r0)
262{
263 BN_ULONG b1, b2, r0;
264
265 bn_subw(a, b, &b1, &r0);
266 bn_subw(r0, c, &b2, &r0);
267
268 *out_borrow = b1 + b2;
269 *out_r0 = r0;
270}
271#endif
272
273/*
274 * bn_qwsubqw() computes
275 * (r3:r2:r1:r0) = (a3:a2:a1:a0) - (b3:b2:b1:b0) - borrow, where a is a quad word,
276 * b is a quad word, and borrow is a single word with value 0 or 1, producing a
277 * four word result and borrow.
278 */
279#ifndef HAVE_BN_QWSUBQW
280static inline void
281bn_qwsubqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
282 BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG borrow, BN_ULONG *out_borrow,
283 BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
284{
285 BN_ULONG r3, r2, r1, r0;
286
287 bn_subw_subw(a0, b0, borrow, &borrow, &r0);
288 bn_subw_subw(a1, b1, borrow, &borrow, &r1);
289 bn_subw_subw(a2, b2, borrow, &borrow, &r2);
290 bn_subw_subw(a3, b3, borrow, &borrow, &r3);
291
292 *out_borrow = borrow;
293 *out_r3 = r3;
294 *out_r2 = r2;
295 *out_r1 = r1;
296 *out_r0 = r0;
297}
298#endif
299
300/*
301 * bn_mulw() computes (r1:r0) = a * b, where both inputs are single words,
302 * producing a double word result.
303 */
304#ifndef HAVE_BN_MULW
305/*
306 * Multiply two words (a * b) producing a double word result (h:l).
307 *
308 * This can be rewritten as:
309 *
310 * a * b = (hi32(a) * 2^32 + lo32(a)) * (hi32(b) * 2^32 + lo32(b))
311 * = hi32(a) * hi32(b) * 2^64 +
312 * hi32(a) * lo32(b) * 2^32 +
313 * hi32(b) * lo32(a) * 2^32 +
314 * lo32(a) * lo32(b)
315 *
316 * The multiplication for each part of a and b can be calculated for each of
317 * these four terms without overflowing a BN_ULONG, as the maximum value of a
318 * 32 bit x 32 bit multiplication is 32 + 32 = 64 bits. Once these
319 * multiplications have been performed the result can be partitioned and summed
320 * into a double word (h:l). The same applies on a 32 bit system, substituting
321 * 16 for 32 and 32 for 64.
322 */
323#if 1
324static inline void
325bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
326{
327 BN_ULONG a1, a0, b1, b0, r1, r0;
328 BN_ULONG carry, x;
329
330 a1 = a >> BN_BITS4;
331 a0 = a & BN_MASK2l;
332 b1 = b >> BN_BITS4;
333 b0 = b & BN_MASK2l;
334
335 r1 = a1 * b1;
336 r0 = a0 * b0;
337
338 /* (a1 * b0) << BN_BITS4, partition the result across r1:r0 with carry. */
339 x = a1 * b0;
340 r1 += x >> BN_BITS4;
341 bn_addw(r0, x << BN_BITS4, &carry, &r0);
342 r1 += carry;
343
344 /* (b1 * a0) << BN_BITS4, partition the result across r1:r0 with carry. */
345 x = b1 * a0;
346 r1 += x >> BN_BITS4;
347 bn_addw(r0, x << BN_BITS4, &carry, &r0);
348 r1 += carry;
349
350 *out_r1 = r1;
351 *out_r0 = r0;
352}
353#else
354
355/*
356 * XXX - this accumulator based version uses fewer instructions, however
357 * requires more variables/registers. It seems to be slower on at least amd64
358 * and i386, however may be faster on other architectures that have more
359 * registers available. Further testing is required and one of the two
360 * implementations should eventually be removed.
361 */
362static inline void
363bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
364{
365 BN_ULONG a1, a0, b1, b0, r1, r0, x;
366 BN_ULONG acc0, acc1, acc2, acc3;
367
368 a1 = a >> BN_BITS4;
369 b1 = b >> BN_BITS4;
370 a0 = a & BN_MASK2l;
371 b0 = b & BN_MASK2l;
372
373 r1 = a1 * b1;
374 r0 = a0 * b0;
375
376 acc0 = r0 & BN_MASK2l;
377 acc1 = r0 >> BN_BITS4;
378 acc2 = r1 & BN_MASK2l;
379 acc3 = r1 >> BN_BITS4;
380
381 /* (a1 * b0) << BN_BITS4, partition the result across r1:r0. */
382 x = a1 * b0;
383 acc1 += x & BN_MASK2l;
384 acc2 += (acc1 >> BN_BITS4) + (x >> BN_BITS4);
385 acc1 &= BN_MASK2l;
386 acc3 += acc2 >> BN_BITS4;
387 acc2 &= BN_MASK2l;
388
389 /* (b1 * a0) << BN_BITS4, partition the result across r1:r0. */
390 x = b1 * a0;
391 acc1 += x & BN_MASK2l;
392 acc2 += (acc1 >> BN_BITS4) + (x >> BN_BITS4);
393 acc1 &= BN_MASK2l;
394 acc3 += acc2 >> BN_BITS4;
395 acc2 &= BN_MASK2l;
396
397 *out_r1 = (acc3 << BN_BITS4) | acc2;
398 *out_r0 = (acc1 << BN_BITS4) | acc0;
399}
400#endif
401#endif
402
403#ifndef HAVE_BN_MULW_LO
404static inline BN_ULONG
405bn_mulw_lo(BN_ULONG a, BN_ULONG b)
406{
407 return a * b;
408}
409#endif
410
411#ifndef HAVE_BN_MULW_HI
412static inline BN_ULONG
413bn_mulw_hi(BN_ULONG a, BN_ULONG b)
414{
415 BN_ULONG h, l;
416
417 bn_mulw(a, b, &h, &l);
418
419 return h;
420}
421#endif
422
423/*
424 * bn_mulw_addw() computes (r1:r0) = a * b + c with all inputs being single
425 * words, producing a double word result.
426 */
427#ifndef HAVE_BN_MULW_ADDW
428static inline void
429bn_mulw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
430 BN_ULONG *out_r0)
431{
432 BN_ULONG carry, r1, r0;
433
434 bn_mulw(a, b, &r1, &r0);
435 bn_addw(r0, c, &carry, &r0);
436 r1 += carry;
437
438 *out_r1 = r1;
439 *out_r0 = r0;
440}
441#endif
442
443/*
444 * bn_mulw_addw_addw() computes (r1:r0) = a * b + c + d with all inputs being
445 * single words, producing a double word result.
446 */
447#ifndef HAVE_BN_MULW_ADDW_ADDW
448static inline void
449bn_mulw_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG d,
450 BN_ULONG *out_r1, BN_ULONG *out_r0)
451{
452 BN_ULONG carry, r1, r0;
453
454 bn_mulw_addw(a, b, c, &r1, &r0);
455 bn_addw(r0, d, &carry, &r0);
456 r1 += carry;
457
458 *out_r1 = r1;
459 *out_r0 = r0;
460}
461#endif
462
463/*
464 * bn_mulw_addtw() computes (r2:r1:r0) = a * b + (c2:c1:c0), where a and b are
465 * single words and (c2:c1:c0) is a triple word, producing a triple word result.
466 * The caller must ensure that the inputs provided do not result in c2
467 * overflowing.
468 */
469#ifndef HAVE_BN_MULW_ADDTW
470static inline void
471bn_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
472 BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
473{
474 BN_ULONG carry, r2, r1, r0, x1;
475
476 bn_mulw_addw(a, b, c0, &x1, &r0);
477 bn_addw(c1, x1, &carry, &r1);
478 r2 = c2 + carry;
479
480 *out_r2 = r2;
481 *out_r1 = r1;
482 *out_r0 = r0;
483}
484#endif
485
486/*
487 * bn_mul2_mulw_addtw() computes (r2:r1:r0) = 2 * a * b + (c2:c1:c0), where a
488 * and b are single words and (c2:c1:c0) is a triple word, producing a triple
489 * word result. The caller must ensure that the inputs provided do not result
490 * in c2 overflowing.
491 */
492#ifndef HAVE_BN_MUL2_MULW_ADDTW
493static inline void
494bn_mul2_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
495 BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
496{
497 BN_ULONG r2, r1, r0, x1, x0;
498 BN_ULONG carry;
499
500 bn_mulw(a, b, &x1, &x0);
501 bn_addw(c0, x0, &carry, &r0);
502 bn_addw(c1, x1 + carry, &r2, &r1);
503 bn_addw(c2, r2, &carry, &r2);
504 bn_addw(r0, x0, &carry, &r0);
505 bn_addw(r1, x1 + carry, &carry, &r1);
506 r2 += carry;
507
508 *out_r2 = r2;
509 *out_r1 = r1;
510 *out_r0 = r0;
511}
512#endif
513
514/*
515 * bn_qwmulw_addw() computes (r4:r3:r2:r1:r0) = (a3:a2:a1:a0) * b + c, where a
516 * is a quad word, b is a single word and c is a single word, producing a five
517 * word result.
518 */
519#ifndef HAVE_BN_QWMULW_ADDW
520static inline void
521bn_qwmulw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b,
522 BN_ULONG c, BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2,
523 BN_ULONG *out_r1, BN_ULONG *out_r0)
524{
525 BN_ULONG r3, r2, r1, r0;
526
527 bn_mulw_addw(a0, b, c, &c, &r0);
528 bn_mulw_addw(a1, b, c, &c, &r1);
529 bn_mulw_addw(a2, b, c, &c, &r2);
530 bn_mulw_addw(a3, b, c, &c, &r3);
531
532 *out_r4 = c;
533 *out_r3 = r3;
534 *out_r2 = r2;
535 *out_r1 = r1;
536 *out_r0 = r0;
537}
538#endif
539
540/*
541 * bn_qwmulw_addqw_addw() computes
542 * (r4:r3:r2:r1:r0) = (a3:a2:a1:a0) * b + (c3:c2:c1:c0) + d, where a
543 * is a quad word, b is a single word, c is a quad word, and d is a single word,
544 * producing a five word result.
545 */
546#ifndef HAVE_BN_QWMULW_ADDQW_ADDW
547static inline void
548bn_qwmulw_addqw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0,
549 BN_ULONG b, BN_ULONG c3, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0, BN_ULONG d,
550 BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1,
551 BN_ULONG *out_r0)
552{
553 BN_ULONG r3, r2, r1, r0;
554
555 bn_mulw_addw_addw(a0, b, c0, d, &d, &r0);
556 bn_mulw_addw_addw(a1, b, c1, d, &d, &r1);
557 bn_mulw_addw_addw(a2, b, c2, d, &d, &r2);
558 bn_mulw_addw_addw(a3, b, c3, d, &d, &r3);
559
560 *out_r4 = d;
561 *out_r3 = r3;
562 *out_r2 = r2;
563 *out_r1 = r1;
564 *out_r0 = r0;
565}
566#endif
567
568#endif
diff --git a/src/lib/libcrypto/bn/bn_isqrt.c b/src/lib/libcrypto/bn/bn_isqrt.c
deleted file mode 100644
index 018d5f34bd..0000000000
--- a/src/lib/libcrypto/bn/bn_isqrt.c
+++ /dev/null
@@ -1,234 +0,0 @@
1/* $OpenBSD: bn_isqrt.c,v 1.10 2023/06/04 17:28:35 tb Exp $ */
2/*
3 * Copyright (c) 2022 Theo Buehler <tb@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <stddef.h>
19#include <stdint.h>
20
21#include <openssl/bn.h>
22#include <openssl/err.h>
23
24#include "bn_local.h"
25#include "crypto_internal.h"
26
27/*
28 * Calculate integer square root of |n| using a variant of Newton's method.
29 *
30 * Returns the integer square root of |n| in the caller-provided |out_sqrt|;
31 * |*out_perfect| is set to 1 if and only if |n| is a perfect square.
32 * One of |out_sqrt| and |out_perfect| can be NULL; |in_ctx| can be NULL.
33 *
34 * Returns 0 on error, 1 on success.
35 *
36 * Adapted from pure Python describing cpython's math.isqrt(), without bothering
37 * with any of the optimizations in the C code. A correctness proof is here:
38 * https://github.com/mdickinson/snippets/blob/master/proofs/isqrt/src/isqrt.lean
39 * The comments in the Python code also give a rather detailed proof.
40 */
41
42int
43bn_isqrt(BIGNUM *out_sqrt, int *out_perfect, const BIGNUM *n, BN_CTX *in_ctx)
44{
45 BN_CTX *ctx = NULL;
46 BIGNUM *a, *b;
47 int c, d, e, s;
48 int cmp, perfect;
49 int ret = 0;
50
51 if (out_perfect == NULL && out_sqrt == NULL) {
52 BNerror(ERR_R_PASSED_NULL_PARAMETER);
53 goto err;
54 }
55
56 if (BN_is_negative(n)) {
57 BNerror(BN_R_INVALID_RANGE);
58 goto err;
59 }
60
61 if ((ctx = in_ctx) == NULL)
62 ctx = BN_CTX_new();
63 if (ctx == NULL)
64 goto err;
65
66 BN_CTX_start(ctx);
67
68 if ((a = BN_CTX_get(ctx)) == NULL)
69 goto err;
70 if ((b = BN_CTX_get(ctx)) == NULL)
71 goto err;
72
73 if (BN_is_zero(n)) {
74 perfect = 1;
75 BN_zero(a);
76 goto done;
77 }
78
79 if (!BN_one(a))
80 goto err;
81
82 c = (BN_num_bits(n) - 1) / 2;
83 d = 0;
84
85 /* Calculate s = floor(log(c)). */
86 if (!BN_set_word(b, c))
87 goto err;
88 s = BN_num_bits(b) - 1;
89
90 /*
91 * By definition, the loop below is run <= floor(log(log(n))) times.
92 * Comments in the cpython code establish the loop invariant that
93 *
94 * (a - 1)^2 < n / 4^(c - d) < (a + 1)^2
95 *
96 * holds true in every iteration. Once this is proved via induction,
97 * correctness of the algorithm is easy.
98 *
99 * Roughly speaking, A = (a << (d - e)) is used for one Newton step
100 * "a = (A >> 1) + (m >> 1) / A" approximating m = (n >> 2 * (c - d)).
101 */
102
103 for (; s >= 0; s--) {
104 e = d;
105 d = c >> s;
106
107 if (!BN_rshift(b, n, 2 * c - d - e + 1))
108 goto err;
109
110 if (!BN_div_ct(b, NULL, b, a, ctx))
111 goto err;
112
113 if (!BN_lshift(a, a, d - e - 1))
114 goto err;
115
116 if (!BN_add(a, a, b))
117 goto err;
118 }
119
120 /*
121 * The loop invariant implies that either a or a - 1 is isqrt(n).
122 * Figure out which one it is. The invariant also implies that for
123 * a perfect square n, a must be the square root.
124 */
125
126 if (!BN_sqr(b, a, ctx))
127 goto err;
128
129 /* If a^2 > n, we must have isqrt(n) == a - 1. */
130 if ((cmp = BN_cmp(b, n)) > 0) {
131 if (!BN_sub_word(a, 1))
132 goto err;
133 }
134
135 perfect = cmp == 0;
136
137 done:
138 if (out_perfect != NULL)
139 *out_perfect = perfect;
140
141 if (out_sqrt != NULL) {
142 if (!bn_copy(out_sqrt, a))
143 goto err;
144 }
145
146 ret = 1;
147
148 err:
149 BN_CTX_end(ctx);
150
151 if (ctx != in_ctx)
152 BN_CTX_free(ctx);
153
154 return ret;
155}
156
157/*
158 * is_square_mod_N[r % N] indicates whether r % N has a square root modulo N.
159 * The tables are generated in regress/lib/libcrypto/bn/bn_isqrt.c.
160 */
161
162const uint8_t is_square_mod_11[] = {
163 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0,
164};
165CTASSERT(sizeof(is_square_mod_11) == 11);
166
167const uint8_t is_square_mod_63[] = {
168 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,
169 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0,
170 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
171 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
172};
173CTASSERT(sizeof(is_square_mod_63) == 63);
174
175const uint8_t is_square_mod_64[] = {
176 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
177 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
178 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
179 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
180};
181CTASSERT(sizeof(is_square_mod_64) == 64);
182
183const uint8_t is_square_mod_65[] = {
184 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0,
185 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0,
186 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
187 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0,
188 1,
189};
190CTASSERT(sizeof(is_square_mod_65) == 65);
191
192/*
193 * Determine whether n is a perfect square or not.
194 *
195 * Returns 1 on success and 0 on error. In case of success, |*out_perfect| is
196 * set to 1 if and only if |n| is a perfect square.
197 */
198
199int
200bn_is_perfect_square(int *out_perfect, const BIGNUM *n, BN_CTX *ctx)
201{
202 BN_ULONG r;
203
204 *out_perfect = 0;
205
206 if (BN_is_negative(n))
207 return 1;
208
209 /*
210 * Before performing an expensive bn_isqrt() operation, weed out many
211 * obvious non-squares. See H. Cohen, "A course in computational
212 * algebraic number theory", Algorithm 1.7.3.
213 *
214 * The idea is that a square remains a square when reduced modulo any
215 * number. The moduli are chosen in such a way that a non-square has
216 * probability < 1% of passing the four table lookups.
217 */
218
219 /* n % 64 */
220 r = BN_lsw(n) & 0x3f;
221
222 if (!is_square_mod_64[r % 64])
223 return 1;
224
225 if ((r = BN_mod_word(n, 11 * 63 * 65)) == (BN_ULONG)-1)
226 return 0;
227
228 if (!is_square_mod_63[r % 63] ||
229 !is_square_mod_65[r % 65] ||
230 !is_square_mod_11[r % 11])
231 return 1;
232
233 return bn_isqrt(NULL, out_perfect, n, ctx);
234}
diff --git a/src/lib/libcrypto/bn/bn_kron.c b/src/lib/libcrypto/bn/bn_kron.c
deleted file mode 100644
index a170d688e9..0000000000
--- a/src/lib/libcrypto/bn/bn_kron.c
+++ /dev/null
@@ -1,195 +0,0 @@
1/* $OpenBSD: bn_kron.c,v 1.15 2023/07/08 12:21:58 beck Exp $ */
2/* ====================================================================
3 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
54 */
55
56#include "bn_local.h"
57
58/*
59 * Kronecker symbol, implemented according to Henri Cohen, "A Course in
60 * Computational Algebraic Number Theory", Algorithm 1.4.10.
61 *
62 * Returns -1, 0, or 1 on success and -2 on error.
63 */
64
65int
66BN_kronecker(const BIGNUM *A, const BIGNUM *B, BN_CTX *ctx)
67{
68 /* tab[BN_lsw(n) & 7] = (-1)^((n^2 - 1)) / 8) for odd values of n. */
69 static const int tab[8] = {0, 1, 0, -1, 0, -1, 0, 1};
70 BIGNUM *a, *b, *tmp;
71 int k, v;
72 int ret = -2;
73
74 BN_CTX_start(ctx);
75
76 if ((a = BN_CTX_get(ctx)) == NULL)
77 goto end;
78 if ((b = BN_CTX_get(ctx)) == NULL)
79 goto end;
80
81 if (!bn_copy(a, A))
82 goto end;
83 if (!bn_copy(b, B))
84 goto end;
85
86 /*
87 * Cohen's step 1:
88 */
89
90 /* If b is zero, output 1 if |a| is 1, otherwise output 0. */
91 if (BN_is_zero(b)) {
92 ret = BN_abs_is_word(a, 1);
93 goto end;
94 }
95
96 /*
97 * Cohen's step 2:
98 */
99
100 /* If both are even, they have a factor in common, so output 0. */
101 if (!BN_is_odd(a) && !BN_is_odd(b)) {
102 ret = 0;
103 goto end;
104 }
105
106 /* Factorize b = 2^v * u with odd u and replace b with u. */
107 v = 0;
108 while (!BN_is_bit_set(b, v))
109 v++;
110 if (!BN_rshift(b, b, v))
111 goto end;
112
113 /* If v is even set k = 1, otherwise set it to (-1)^((a^2 - 1) / 8). */
114 k = 1;
115 if (v % 2 != 0)
116 k = tab[BN_lsw(a) & 7];
117
118 /*
119 * If b is negative, replace it with -b and if a is also negative
120 * replace k with -k.
121 */
122 if (BN_is_negative(b)) {
123 BN_set_negative(b, 0);
124
125 if (BN_is_negative(a))
126 k = -k;
127 }
128
129 /*
130 * Now b is positive and odd, so compute the Jacobi symbol (a/b)
131 * and multiply it by k.
132 */
133
134 while (1) {
135 /*
136 * Cohen's step 3:
137 */
138
139 /* b is positive and odd. */
140
141 /* If a is zero output k if b is one, otherwise output 0. */
142 if (BN_is_zero(a)) {
143 ret = BN_is_one(b) ? k : 0;
144 goto end;
145 }
146
147 /* Factorize a = 2^v * u with odd u and replace a with u. */
148 v = 0;
149 while (!BN_is_bit_set(a, v))
150 v++;
151 if (!BN_rshift(a, a, v))
152 goto end;
153
154 /* If v is odd, multiply k with (-1)^((b^2 - 1) / 8). */
155 if (v % 2 != 0)
156 k *= tab[BN_lsw(b) & 7];
157
158 /*
159 * Cohen's step 4:
160 */
161
162 /*
163 * Apply the reciprocity law: multiply k by (-1)^((a-1)(b-1)/4).
164 *
165 * This expression is -1 if and only if a and b are 3 (mod 4).
166 * In turn, this is the case if and only if their two's
167 * complement representations have the second bit set.
168 * a could be negative in the first iteration, b is positive.
169 */
170 if ((BN_is_negative(a) ? ~BN_lsw(a) : BN_lsw(a)) & BN_lsw(b) & 2)
171 k = -k;
172
173 /*
174 * (a, b) := (b mod |a|, |a|)
175 *
176 * Once this is done, we know that 0 < a < b at the start of the
177 * loop. Since b is strictly decreasing, the loop terminates.
178 */
179
180 if (!BN_nnmod(b, b, a, ctx))
181 goto end;
182
183 tmp = a;
184 a = b;
185 b = tmp;
186
187 BN_set_negative(b, 0);
188 }
189
190 end:
191 BN_CTX_end(ctx);
192
193 return ret;
194}
195LCRYPTO_ALIAS(BN_kronecker);
diff --git a/src/lib/libcrypto/bn/bn_lib.c b/src/lib/libcrypto/bn/bn_lib.c
deleted file mode 100644
index 72b988650c..0000000000
--- a/src/lib/libcrypto/bn/bn_lib.c
+++ /dev/null
@@ -1,752 +0,0 @@
1/* $OpenBSD: bn_lib.c,v 1.93 2024/04/16 13:07:14 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <assert.h>
60#include <limits.h>
61#include <stdio.h>
62#include <string.h>
63
64#include <openssl/opensslconf.h>
65
66#include <openssl/err.h>
67
68#include "bn_local.h"
69#include "bn_internal.h"
70
71BIGNUM *
72BN_new(void)
73{
74 BIGNUM *bn;
75
76 if ((bn = calloc(1, sizeof(BIGNUM))) == NULL) {
77 BNerror(ERR_R_MALLOC_FAILURE);
78 return NULL;
79 }
80 bn->flags = BN_FLG_MALLOCED;
81
82 return bn;
83}
84LCRYPTO_ALIAS(BN_new);
85
86void
87BN_init(BIGNUM *a)
88{
89 memset(a, 0, sizeof(BIGNUM));
90}
91
92void
93BN_clear(BIGNUM *a)
94{
95 if (a->d != NULL)
96 explicit_bzero(a->d, a->dmax * sizeof(a->d[0]));
97 a->top = 0;
98 a->neg = 0;
99}
100LCRYPTO_ALIAS(BN_clear);
101
102void
103BN_free(BIGNUM *bn)
104{
105 if (bn == NULL)
106 return;
107
108 if (!BN_get_flags(bn, BN_FLG_STATIC_DATA))
109 freezero(bn->d, bn->dmax * sizeof(bn->d[0]));
110
111 if (!BN_get_flags(bn, BN_FLG_MALLOCED)) {
112 explicit_bzero(bn, sizeof(*bn));
113 return;
114 }
115
116 freezero(bn, sizeof(*bn));
117}
118LCRYPTO_ALIAS(BN_free);
119
120void
121BN_clear_free(BIGNUM *bn)
122{
123 BN_free(bn);
124}
125LCRYPTO_ALIAS(BN_clear_free);
126
127void
128BN_set_flags(BIGNUM *b, int n)
129{
130 b->flags |= n;
131}
132LCRYPTO_ALIAS(BN_set_flags);
133
134int
135BN_get_flags(const BIGNUM *b, int n)
136{
137 return b->flags & n;
138}
139LCRYPTO_ALIAS(BN_get_flags);
140
141void
142BN_with_flags(BIGNUM *dest, const BIGNUM *b, int flags)
143{
144 int dest_flags;
145
146 dest_flags = (dest->flags & BN_FLG_MALLOCED) |
147 (b->flags & ~BN_FLG_MALLOCED) | BN_FLG_STATIC_DATA | flags;
148
149 *dest = *b;
150 dest->flags = dest_flags;
151}
152LCRYPTO_ALIAS(BN_with_flags);
153
154static const BN_ULONG bn_value_one_data = 1;
155static const BIGNUM bn_value_one = {
156 .d = (BN_ULONG *)&bn_value_one_data,
157 .top = 1,
158 .dmax = 1,
159 .neg = 0,
160 .flags = BN_FLG_STATIC_DATA,
161};
162
163const BIGNUM *
164BN_value_one(void)
165{
166 return &bn_value_one;
167}
168LCRYPTO_ALIAS(BN_value_one);
169
170int
171BN_num_bits_word(BN_ULONG w)
172{
173 return BN_BITS2 - bn_clzw(w);
174}
175LCRYPTO_ALIAS(BN_num_bits_word);
176
177int
178BN_num_bits(const BIGNUM *bn)
179{
180 return bn_bitsize(bn);
181}
182LCRYPTO_ALIAS(BN_num_bits);
183
184void
185bn_correct_top(BIGNUM *a)
186{
187 while (a->top > 0 && a->d[a->top - 1] == 0)
188 a->top--;
189}
190
191static int
192bn_expand_internal(BIGNUM *bn, int words)
193{
194 BN_ULONG *d;
195
196 if (words < 0) {
197 BNerror(BN_R_BIGNUM_TOO_LONG); // XXX
198 return 0;
199 }
200
201 if (words > INT_MAX / (4 * BN_BITS2)) {
202 BNerror(BN_R_BIGNUM_TOO_LONG);
203 return 0;
204 }
205 if (BN_get_flags(bn, BN_FLG_STATIC_DATA)) {
206 BNerror(BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
207 return 0;
208 }
209
210 d = recallocarray(bn->d, bn->dmax, words, sizeof(BN_ULONG));
211 if (d == NULL) {
212 BNerror(ERR_R_MALLOC_FAILURE);
213 return 0;
214 }
215 bn->d = d;
216 bn->dmax = words;
217
218 return 1;
219}
220
221int
222bn_expand_bits(BIGNUM *bn, size_t bits)
223{
224 int words;
225
226 if (bits > (INT_MAX - BN_BITS2 + 1))
227 return 0;
228
229 words = (bits + BN_BITS2 - 1) / BN_BITS2;
230
231 return bn_wexpand(bn, words);
232}
233
234int
235bn_expand_bytes(BIGNUM *bn, size_t bytes)
236{
237 int words;
238
239 if (bytes > (INT_MAX - BN_BYTES + 1))
240 return 0;
241
242 words = (bytes + BN_BYTES - 1) / BN_BYTES;
243
244 return bn_wexpand(bn, words);
245}
246
247int
248bn_wexpand(BIGNUM *bn, int words)
249{
250 if (words < 0)
251 return 0;
252
253 if (words <= bn->dmax)
254 return 1;
255
256 return bn_expand_internal(bn, words);
257}
258
259BIGNUM *
260BN_dup(const BIGNUM *a)
261{
262 BIGNUM *t;
263
264 if (a == NULL)
265 return NULL;
266
267 t = BN_new();
268 if (t == NULL)
269 return NULL;
270 if (!bn_copy(t, a)) {
271 BN_free(t);
272 return NULL;
273 }
274 return t;
275}
276LCRYPTO_ALIAS(BN_dup);
277
278static inline void
279bn_copy_words(BN_ULONG *ap, const BN_ULONG *bp, int n)
280{
281 while (n > 0) {
282 ap[0] = bp[0];
283 ap++;
284 bp++;
285 n--;
286 }
287}
288
289BIGNUM *
290BN_copy(BIGNUM *a, const BIGNUM *b)
291{
292 if (a == b)
293 return (a);
294
295 if (!bn_wexpand(a, b->top))
296 return (NULL);
297
298 bn_copy_words(a->d, b->d, b->top);
299
300 /* Copy constant time flag from b, but make it sticky on a. */
301 a->flags |= b->flags & BN_FLG_CONSTTIME;
302
303 a->top = b->top;
304 a->neg = b->neg;
305
306 return (a);
307}
308LCRYPTO_ALIAS(BN_copy);
309
310int
311bn_copy(BIGNUM *dst, const BIGNUM *src)
312{
313 return BN_copy(dst, src) != NULL;
314}
315
316void
317BN_swap(BIGNUM *a, BIGNUM *b)
318{
319 int flags_old_a, flags_old_b;
320 BN_ULONG *tmp_d;
321 int tmp_top, tmp_dmax, tmp_neg;
322
323
324 flags_old_a = a->flags;
325 flags_old_b = b->flags;
326
327 tmp_d = a->d;
328 tmp_top = a->top;
329 tmp_dmax = a->dmax;
330 tmp_neg = a->neg;
331
332 a->d = b->d;
333 a->top = b->top;
334 a->dmax = b->dmax;
335 a->neg = b->neg;
336
337 b->d = tmp_d;
338 b->top = tmp_top;
339 b->dmax = tmp_dmax;
340 b->neg = tmp_neg;
341
342 a->flags = (flags_old_a & BN_FLG_MALLOCED) |
343 (flags_old_b & BN_FLG_STATIC_DATA);
344 b->flags = (flags_old_b & BN_FLG_MALLOCED) |
345 (flags_old_a & BN_FLG_STATIC_DATA);
346}
347LCRYPTO_ALIAS(BN_swap);
348
349BN_ULONG
350BN_get_word(const BIGNUM *a)
351{
352 if (a->top > 1)
353 return BN_MASK2;
354 else if (a->top == 1)
355 return a->d[0];
356 /* a->top == 0 */
357 return 0;
358}
359LCRYPTO_ALIAS(BN_get_word);
360
361int
362BN_set_word(BIGNUM *a, BN_ULONG w)
363{
364 if (!bn_wexpand(a, 1))
365 return (0);
366 a->neg = 0;
367 a->d[0] = w;
368 a->top = (w ? 1 : 0);
369 return (1);
370}
371LCRYPTO_ALIAS(BN_set_word);
372
373int
374BN_ucmp(const BIGNUM *a, const BIGNUM *b)
375{
376 int i;
377
378 if (a->top < b->top)
379 return -1;
380 if (a->top > b->top)
381 return 1;
382
383 for (i = a->top - 1; i >= 0; i--) {
384 if (a->d[i] != b->d[i])
385 return (a->d[i] > b->d[i] ? 1 : -1);
386 }
387
388 return 0;
389}
390LCRYPTO_ALIAS(BN_ucmp);
391
392int
393BN_cmp(const BIGNUM *a, const BIGNUM *b)
394{
395 if (a == NULL || b == NULL) {
396 if (a != NULL)
397 return -1;
398 if (b != NULL)
399 return 1;
400 return 0;
401 }
402
403 if (a->neg != b->neg)
404 return b->neg - a->neg;
405
406 if (a->neg)
407 return BN_ucmp(b, a);
408
409 return BN_ucmp(a, b);
410}
411LCRYPTO_ALIAS(BN_cmp);
412
413int
414BN_set_bit(BIGNUM *a, int n)
415{
416 int i, j, k;
417
418 if (n < 0)
419 return 0;
420
421 i = n / BN_BITS2;
422 j = n % BN_BITS2;
423 if (a->top <= i) {
424 if (!bn_wexpand(a, i + 1))
425 return (0);
426 for (k = a->top; k < i + 1; k++)
427 a->d[k] = 0;
428 a->top = i + 1;
429 }
430
431 a->d[i] |= (((BN_ULONG)1) << j);
432 return (1);
433}
434LCRYPTO_ALIAS(BN_set_bit);
435
436int
437BN_clear_bit(BIGNUM *a, int n)
438{
439 int i, j;
440
441 if (n < 0)
442 return 0;
443
444 i = n / BN_BITS2;
445 j = n % BN_BITS2;
446 if (a->top <= i)
447 return (0);
448
449 a->d[i] &= (~(((BN_ULONG)1) << j));
450 bn_correct_top(a);
451
452 BN_set_negative(a, a->neg);
453
454 return (1);
455}
456LCRYPTO_ALIAS(BN_clear_bit);
457
458int
459BN_is_bit_set(const BIGNUM *a, int n)
460{
461 int i, j;
462
463 if (n < 0)
464 return 0;
465 i = n / BN_BITS2;
466 j = n % BN_BITS2;
467 if (a->top <= i)
468 return 0;
469 return (int)(((a->d[i]) >> j) & ((BN_ULONG)1));
470}
471LCRYPTO_ALIAS(BN_is_bit_set);
472
473int
474BN_mask_bits(BIGNUM *a, int n)
475{
476 int b, w;
477
478 if (n < 0)
479 return 0;
480
481 w = n / BN_BITS2;
482 b = n % BN_BITS2;
483 if (w >= a->top)
484 return 0;
485 if (b == 0)
486 a->top = w;
487 else {
488 a->top = w + 1;
489 a->d[w] &= ~(BN_MASK2 << b);
490 }
491 bn_correct_top(a);
492
493 BN_set_negative(a, a->neg);
494
495 return (1);
496}
497LCRYPTO_ALIAS(BN_mask_bits);
498
499void
500BN_set_negative(BIGNUM *bn, int neg)
501{
502 bn->neg = ~BN_is_zero(bn) & bn_ct_ne_zero(neg);
503}
504LCRYPTO_ALIAS(BN_set_negative);
505
506/*
507 * Constant-time conditional swap of a and b.
508 * a and b are swapped if condition is not 0.
509 * The code assumes that at most one bit of condition is set.
510 * nwords is the number of words to swap.
511 * The code assumes that at least nwords are allocated in both a and b,
512 * and that no more than nwords are used by either a or b.
513 * a and b cannot be the same number
514 */
515void
516BN_consttime_swap(BN_ULONG condition, BIGNUM *a, BIGNUM *b, int nwords)
517{
518 BN_ULONG t;
519 int i;
520
521 assert(a != b);
522 assert((condition & (condition - 1)) == 0);
523 assert(sizeof(BN_ULONG) >= sizeof(int));
524
525 condition = ((condition - 1) >> (BN_BITS2 - 1)) - 1;
526
527 t = (a->top^b->top) & condition;
528 a->top ^= t;
529 b->top ^= t;
530
531#define BN_CONSTTIME_SWAP(ind) \
532 do { \
533 t = (a->d[ind] ^ b->d[ind]) & condition; \
534 a->d[ind] ^= t; \
535 b->d[ind] ^= t; \
536 } while (0)
537
538
539 switch (nwords) {
540 default:
541 for (i = 10; i < nwords; i++)
542 BN_CONSTTIME_SWAP(i);
543 /* Fallthrough */
544 case 10: BN_CONSTTIME_SWAP(9); /* Fallthrough */
545 case 9: BN_CONSTTIME_SWAP(8); /* Fallthrough */
546 case 8: BN_CONSTTIME_SWAP(7); /* Fallthrough */
547 case 7: BN_CONSTTIME_SWAP(6); /* Fallthrough */
548 case 6: BN_CONSTTIME_SWAP(5); /* Fallthrough */
549 case 5: BN_CONSTTIME_SWAP(4); /* Fallthrough */
550 case 4: BN_CONSTTIME_SWAP(3); /* Fallthrough */
551 case 3: BN_CONSTTIME_SWAP(2); /* Fallthrough */
552 case 2: BN_CONSTTIME_SWAP(1); /* Fallthrough */
553 case 1:
554 BN_CONSTTIME_SWAP(0);
555 }
556#undef BN_CONSTTIME_SWAP
557}
558LCRYPTO_ALIAS(BN_consttime_swap);
559
560/*
561 * Constant-time conditional swap of a and b.
562 * a and b are swapped if condition is not 0.
563 * nwords is the number of words to swap.
564 */
565int
566BN_swap_ct(BN_ULONG condition, BIGNUM *a, BIGNUM *b, size_t nwords)
567{
568 BN_ULONG t;
569 int i, words;
570
571 if (a == b)
572 return 1;
573 if (nwords > INT_MAX)
574 return 0;
575 words = (int)nwords;
576 if (!bn_wexpand(a, words) || !bn_wexpand(b, words))
577 return 0;
578 if (a->top > words || b->top > words) {
579 BNerror(BN_R_INVALID_LENGTH);
580 return 0;
581 }
582
583 /* Set condition to 0 (if it was zero) or all 1s otherwise. */
584 condition = ((~condition & (condition - 1)) >> (BN_BITS2 - 1)) - 1;
585
586 /* swap top field */
587 t = (a->top ^ b->top) & condition;
588 a->top ^= t;
589 b->top ^= t;
590
591 /* swap neg field */
592 t = (a->neg ^ b->neg) & condition;
593 a->neg ^= t;
594 b->neg ^= t;
595
596 /* swap BN_FLG_CONSTTIME from flag field */
597 t = ((a->flags ^ b->flags) & BN_FLG_CONSTTIME) & condition;
598 a->flags ^= t;
599 b->flags ^= t;
600
601 /* swap the data */
602 for (i = 0; i < words; i++) {
603 t = (a->d[i] ^ b->d[i]) & condition;
604 a->d[i] ^= t;
605 b->d[i] ^= t;
606 }
607
608 return 1;
609}
610
611void
612BN_zero(BIGNUM *a)
613{
614 a->neg = 0;
615 a->top = 0;
616}
617LCRYPTO_ALIAS(BN_zero);
618
619int
620BN_one(BIGNUM *a)
621{
622 return BN_set_word(a, 1);
623}
624LCRYPTO_ALIAS(BN_one);
625
626int
627BN_abs_is_word(const BIGNUM *a, const BN_ULONG w)
628{
629 return (a->top == 1 && a->d[0] == w) || (w == 0 && a->top == 0);
630}
631LCRYPTO_ALIAS(BN_abs_is_word);
632
633int
634BN_is_zero(const BIGNUM *bn)
635{
636 BN_ULONG bits = 0;
637 int i;
638
639 for (i = 0; i < bn->top; i++)
640 bits |= bn->d[i];
641
642 return bits == 0;
643}
644LCRYPTO_ALIAS(BN_is_zero);
645
646int
647BN_is_one(const BIGNUM *a)
648{
649 return BN_abs_is_word(a, 1) && !a->neg;
650}
651LCRYPTO_ALIAS(BN_is_one);
652
653int
654BN_is_word(const BIGNUM *a, const BN_ULONG w)
655{
656 return BN_abs_is_word(a, w) && (w == 0 || !a->neg);
657}
658LCRYPTO_ALIAS(BN_is_word);
659
660int
661BN_is_odd(const BIGNUM *a)
662{
663 return a->top > 0 && (a->d[0] & 1);
664}
665LCRYPTO_ALIAS(BN_is_odd);
666
667int
668BN_is_negative(const BIGNUM *a)
669{
670 return a->neg != 0;
671}
672LCRYPTO_ALIAS(BN_is_negative);
673
674/*
675 * Bits of security, see SP800-57, section 5.6.11, table 2.
676 */
677int
678BN_security_bits(int L, int N)
679{
680 int secbits, bits;
681
682 if (L >= 15360)
683 secbits = 256;
684 else if (L >= 7680)
685 secbits = 192;
686 else if (L >= 3072)
687 secbits = 128;
688 else if (L >= 2048)
689 secbits = 112;
690 else if (L >= 1024)
691 secbits = 80;
692 else
693 return 0;
694
695 if (N == -1)
696 return secbits;
697
698 bits = N / 2;
699 if (bits < 80)
700 return 0;
701
702 return bits >= secbits ? secbits : bits;
703}
704LCRYPTO_ALIAS(BN_security_bits);
705
706BN_GENCB *
707BN_GENCB_new(void)
708{
709 BN_GENCB *cb;
710
711 if ((cb = calloc(1, sizeof(*cb))) == NULL)
712 return NULL;
713
714 return cb;
715}
716LCRYPTO_ALIAS(BN_GENCB_new);
717
718void
719BN_GENCB_free(BN_GENCB *cb)
720{
721 if (cb == NULL)
722 return;
723 free(cb);
724}
725LCRYPTO_ALIAS(BN_GENCB_free);
726
727/* Populate a BN_GENCB structure with an "old"-style callback */
728void
729BN_GENCB_set_old(BN_GENCB *gencb, void (*cb)(int, int, void *), void *cb_arg)
730{
731 gencb->ver = 1;
732 gencb->cb.cb_1 = cb;
733 gencb->arg = cb_arg;
734}
735LCRYPTO_ALIAS(BN_GENCB_set_old);
736
737/* Populate a BN_GENCB structure with a "new"-style callback */
738void
739BN_GENCB_set(BN_GENCB *gencb, int (*cb)(int, int, BN_GENCB *), void *cb_arg)
740{
741 gencb->ver = 2;
742 gencb->cb.cb_2 = cb;
743 gencb->arg = cb_arg;
744}
745LCRYPTO_ALIAS(BN_GENCB_set);
746
747void *
748BN_GENCB_get_arg(BN_GENCB *cb)
749{
750 return cb->arg;
751}
752LCRYPTO_ALIAS(BN_GENCB_get_arg);
diff --git a/src/lib/libcrypto/bn/bn_local.h b/src/lib/libcrypto/bn/bn_local.h
deleted file mode 100644
index 067ffab3d9..0000000000
--- a/src/lib/libcrypto/bn/bn_local.h
+++ /dev/null
@@ -1,335 +0,0 @@
1/* $OpenBSD: bn_local.h,v 1.50 2025/02/13 11:04:20 tb Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#ifndef HEADER_BN_LOCAL_H
113#define HEADER_BN_LOCAL_H
114
115#include <openssl/opensslconf.h>
116
117#include <openssl/bn.h>
118
119__BEGIN_HIDDEN_DECLS
120
121struct bignum_st {
122 BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */
123 int top; /* Index of last used d +1. */
124 /* The next are internal book keeping for bn_expand. */
125 int dmax; /* Size of the d array. */
126 int neg; /* one if the number is negative */
127 int flags;
128};
129
130struct bn_mont_ctx_st {
131 int ri; /* Number of bits in R */
132 BIGNUM RR; /* Used to convert to Montgomery form */
133 BIGNUM N; /* Modulus */
134
135 /* Least significant word(s) of Ni; R*(1/R mod N) - N*Ni = 1 */
136 BN_ULONG n0[2];
137
138 int flags;
139};
140
141typedef struct bn_recp_ctx_st BN_RECP_CTX;
142
143/* Used for slow "generation" functions. */
144struct bn_gencb_st {
145 unsigned int ver; /* To handle binary (in)compatibility */
146 void *arg; /* callback-specific data */
147 union {
148 /* if(ver==1) - handles old style callbacks */
149 void (*cb_1)(int, int, void *);
150 /* if(ver==2) - new callback style */
151 int (*cb_2)(int, int, BN_GENCB *);
152 } cb;
153};
154
155/*
156 * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions
157 *
158 *
159 * For window size 'w' (w >= 2) and a random 'b' bits exponent,
160 * the number of multiplications is a constant plus on average
161 *
162 * 2^(w-1) + (b-w)/(w+1);
163 *
164 * here 2^(w-1) is for precomputing the table (we actually need
165 * entries only for windows that have the lowest bit set), and
166 * (b-w)/(w+1) is an approximation for the expected number of
167 * w-bit windows, not counting the first one.
168 *
169 * Thus we should use
170 *
171 * w >= 6 if b > 671
172 * w = 5 if 671 > b > 239
173 * w = 4 if 239 > b > 79
174 * w = 3 if 79 > b > 23
175 * w <= 2 if 23 > b
176 *
177 * (with draws in between). Very small exponents are often selected
178 * with low Hamming weight, so we use w = 1 for b <= 23.
179 */
180#define BN_window_bits_for_exponent_size(b) \
181 ((b) > 671 ? 6 : \
182 (b) > 239 ? 5 : \
183 (b) > 79 ? 4 : \
184 (b) > 23 ? 3 : 1)
185
186
187/* BN_mod_exp_mont_consttime is based on the assumption that the
188 * L1 data cache line width of the target processor is at least
189 * the following value.
190 */
191#define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH ( 64 )
192#define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
193
194/* Window sizes optimized for fixed window size modular exponentiation
195 * algorithm (BN_mod_exp_mont_consttime).
196 *
197 * To achieve the security goals of BN_mode_exp_mont_consttime, the
198 * maximum size of the window must not exceed
199 * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
200 *
201 * Window size thresholds are defined for cache line sizes of 32 and 64,
202 * cache line sizes where log_2(32)=5 and log_2(64)=6 respectively. A
203 * window size of 7 should only be used on processors that have a 128
204 * byte or greater cache line size.
205 */
206#if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
207
208# define BN_window_bits_for_ctime_exponent_size(b) \
209 ((b) > 937 ? 6 : \
210 (b) > 306 ? 5 : \
211 (b) > 89 ? 4 : \
212 (b) > 22 ? 3 : 1)
213# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
214
215#elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
216
217# define BN_window_bits_for_ctime_exponent_size(b) \
218 ((b) > 306 ? 5 : \
219 (b) > 89 ? 4 : \
220 (b) > 22 ? 3 : 1)
221# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
222
223#endif
224
225
226/* Pentium pro 16,16,16,32,64 */
227/* Alpha 16,16,16,16.64 */
228#define BN_MULL_SIZE_NORMAL (16) /* 32 */
229#define BN_MUL_RECURSIVE_SIZE_NORMAL (16) /* 32 less than */
230#define BN_SQR_RECURSIVE_SIZE_NORMAL (16) /* 32 */
231#define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) /* 32 */
232#define BN_MONT_CTX_SET_SIZE_WORD (64) /* 32 */
233
234/* The least significant word of a BIGNUM. */
235#define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0])
236
237BN_ULONG bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len,
238 const BN_ULONG *b, int b_len);
239BN_ULONG bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len,
240 const BN_ULONG *b, int b_len);
241
242void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb);
243void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
244void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
245
246void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a);
247void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a);
248
249int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
250 const BN_ULONG *np, const BN_ULONG *n0, int num);
251
252void bn_correct_top(BIGNUM *a);
253int bn_expand_bits(BIGNUM *a, size_t bits);
254int bn_expand_bytes(BIGNUM *a, size_t bytes);
255int bn_wexpand(BIGNUM *a, int words);
256
257BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
258 int num);
259BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
260 int num);
261BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
262BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
263void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num);
264BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d);
265void bn_div_rem_words(BN_ULONG h, BN_ULONG l, BN_ULONG d, BN_ULONG *out_q,
266 BN_ULONG *out_r);
267
268int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom);
269int bn_rand_in_range(BIGNUM *rnd, const BIGNUM *lower_inc, const BIGNUM *upper_exc);
270int bn_rand_interval(BIGNUM *rnd, BN_ULONG lower_word, const BIGNUM *upper_exc);
271
272void BN_init(BIGNUM *);
273
274BN_MONT_CTX *BN_MONT_CTX_create(const BIGNUM *bn, BN_CTX *ctx);
275
276BN_RECP_CTX *BN_RECP_CTX_create(const BIGNUM *N);
277void BN_RECP_CTX_free(BN_RECP_CTX *recp);
278int BN_div_reciprocal(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
279 BN_RECP_CTX *recp, BN_CTX *ctx);
280int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
281 BN_RECP_CTX *recp, BN_CTX *ctx);
282int BN_mod_sqr_reciprocal(BIGNUM *r, const BIGNUM *x, BN_RECP_CTX *recp,
283 BN_CTX *ctx);
284int BN_mod_exp_reciprocal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
285 const BIGNUM *m, BN_CTX *ctx);
286
287/* Explicitly const time / non-const time versions for internal use */
288int BN_mod_exp_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
289 const BIGNUM *m, BN_CTX *ctx);
290int BN_mod_exp_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
291 const BIGNUM *m, BN_CTX *ctx);
292int BN_mod_exp_mont_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
293 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
294int BN_mod_exp_mont_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
295 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
296int BN_div_nonct(BIGNUM *q, BIGNUM *r, const BIGNUM *n, const BIGNUM *d,
297 BN_CTX *ctx);
298int BN_div_ct(BIGNUM *q, BIGNUM *r, const BIGNUM *n, const BIGNUM *d,
299 BN_CTX *ctx);
300int BN_mod_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx);
301int BN_mod_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx);
302
303int BN_mod_exp_mont_word(BIGNUM *r, BN_ULONG a, const BIGNUM *p,
304 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
305int BN_mod_exp2_mont(BIGNUM *r, const BIGNUM *a1, const BIGNUM *p1,
306 const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
307 BN_CTX *ctx, BN_MONT_CTX *m_ctx);
308
309int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
310 const BIGNUM *m, BN_CTX *ctx);
311
312BIGNUM *BN_mod_inverse_ct(BIGNUM *ret, const BIGNUM *a, const BIGNUM *n,
313 BN_CTX *ctx);
314BIGNUM *BN_mod_inverse_nonct(BIGNUM *ret, const BIGNUM *a, const BIGNUM *n,
315 BN_CTX *ctx);
316int BN_gcd_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
317
318int BN_swap_ct(BN_ULONG swap, BIGNUM *a, BIGNUM *b, size_t nwords);
319
320int bn_copy(BIGNUM *dst, const BIGNUM *src);
321
322int bn_isqrt(BIGNUM *out_sqrt, int *out_perfect, const BIGNUM *n, BN_CTX *ctx);
323int bn_is_perfect_square(int *out_perfect, const BIGNUM *n, BN_CTX *ctx);
324
325int bn_is_prime_bpsw(int *is_prime, const BIGNUM *n, BN_CTX *ctx, size_t rounds);
326
327int bn_printf(BIO *bio, const BIGNUM *bn, int indent, const char *fmt, ...)
328 __attribute__((__format__ (printf, 4, 5)))
329 __attribute__((__nonnull__ (4)));
330
331int bn_bn2hex_nosign(const BIGNUM *bn, char **out, size_t *out_len);
332int bn_bn2hex_nibbles(const BIGNUM *bn, char **out, size_t *out_len);
333
334__END_HIDDEN_DECLS
335#endif /* !HEADER_BN_LOCAL_H */
diff --git a/src/lib/libcrypto/bn/bn_mod.c b/src/lib/libcrypto/bn/bn_mod.c
deleted file mode 100644
index 365f6fcf03..0000000000
--- a/src/lib/libcrypto/bn/bn_mod.c
+++ /dev/null
@@ -1,369 +0,0 @@
1/* $OpenBSD: bn_mod.c,v 1.22 2023/07/08 12:21:58 beck Exp $ */
2/* Includes code written by Lenka Fibikova <fibikova@exp-math.uni-essen.de>
3 * for the OpenSSL project. */
4/* ====================================================================
5 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * 3. All advertising materials mentioning features or use of this
20 * software must display the following acknowledgment:
21 * "This product includes software developed by the OpenSSL Project
22 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
23 *
24 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
25 * endorse or promote products derived from this software without
26 * prior written permission. For written permission, please contact
27 * openssl-core@openssl.org.
28 *
29 * 5. Products derived from this software may not be called "OpenSSL"
30 * nor may "OpenSSL" appear in their names without prior written
31 * permission of the OpenSSL Project.
32 *
33 * 6. Redistributions of any form whatsoever must retain the following
34 * acknowledgment:
35 * "This product includes software developed by the OpenSSL Project
36 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
39 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
41 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
44 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
45 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
46 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
47 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
48 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
49 * OF THE POSSIBILITY OF SUCH DAMAGE.
50 * ====================================================================
51 *
52 * This product includes cryptographic software written by Eric Young
53 * (eay@cryptsoft.com). This product includes software written by Tim
54 * Hudson (tjh@cryptsoft.com).
55 *
56 */
57/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
58 * All rights reserved.
59 *
60 * This package is an SSL implementation written
61 * by Eric Young (eay@cryptsoft.com).
62 * The implementation was written so as to conform with Netscapes SSL.
63 *
64 * This library is free for commercial and non-commercial use as long as
65 * the following conditions are aheared to. The following conditions
66 * apply to all code found in this distribution, be it the RC4, RSA,
67 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
68 * included with this distribution is covered by the same copyright terms
69 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
70 *
71 * Copyright remains Eric Young's, and as such any Copyright notices in
72 * the code are not to be removed.
73 * If this package is used in a product, Eric Young should be given attribution
74 * as the author of the parts of the library used.
75 * This can be in the form of a textual message at program startup or
76 * in documentation (online or textual) provided with the package.
77 *
78 * Redistribution and use in source and binary forms, with or without
79 * modification, are permitted provided that the following conditions
80 * are met:
81 * 1. Redistributions of source code must retain the copyright
82 * notice, this list of conditions and the following disclaimer.
83 * 2. Redistributions in binary form must reproduce the above copyright
84 * notice, this list of conditions and the following disclaimer in the
85 * documentation and/or other materials provided with the distribution.
86 * 3. All advertising materials mentioning features or use of this software
87 * must display the following acknowledgement:
88 * "This product includes cryptographic software written by
89 * Eric Young (eay@cryptsoft.com)"
90 * The word 'cryptographic' can be left out if the rouines from the library
91 * being used are not cryptographic related :-).
92 * 4. If you include any Windows specific code (or a derivative thereof) from
93 * the apps directory (application code) you must include an acknowledgement:
94 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
95 *
96 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
97 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
98 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
99 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
100 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
101 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
102 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
103 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
104 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
105 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
106 * SUCH DAMAGE.
107 *
108 * The licence and distribution terms for any publically available version or
109 * derivative of this code cannot be changed. i.e. this code cannot simply be
110 * copied and put under another distribution licence
111 * [including the GNU Public Licence.]
112 */
113
114#include <openssl/err.h>
115
116#include "bn_local.h"
117
118int
119BN_mod_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
120{
121 return BN_div_ct(NULL, r, a, m, ctx);
122}
123
124int
125BN_mod_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
126{
127 return BN_div_nonct(NULL, r, a, m, ctx);
128}
129
130/*
131 * BN_nnmod() is like BN_mod(), but always returns a non-negative remainder
132 * (that is 0 <= r < |m| always holds). If both a and m have the same sign then
133 * the result is already non-negative. Otherwise, -|m| < r < 0, which needs to
134 * be adjusted as r := r + |m|. This equates to r := |m| - |r|.
135 */
136int
137BN_nnmod(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
138{
139 if (r == m) {
140 BNerror(BN_R_INVALID_ARGUMENT);
141 return 0;
142 }
143 if (!BN_mod_ct(r, a, m, ctx))
144 return 0;
145 if (BN_is_negative(r))
146 return BN_usub(r, m, r);
147 return 1;
148}
149LCRYPTO_ALIAS(BN_nnmod);
150
151int
152BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
153 BN_CTX *ctx)
154{
155 if (r == m) {
156 BNerror(BN_R_INVALID_ARGUMENT);
157 return 0;
158 }
159 if (!BN_add(r, a, b))
160 return 0;
161 return BN_nnmod(r, r, m, ctx);
162}
163LCRYPTO_ALIAS(BN_mod_add);
164
165/*
166 * BN_mod_add() variant that may only be used if both a and b are non-negative
167 * and have already been reduced (less than m).
168 */
169int
170BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m)
171{
172 if (r == m) {
173 BNerror(BN_R_INVALID_ARGUMENT);
174 return 0;
175 }
176 if (!BN_uadd(r, a, b))
177 return 0;
178 if (BN_ucmp(r, m) >= 0)
179 return BN_usub(r, r, m);
180 return 1;
181}
182LCRYPTO_ALIAS(BN_mod_add_quick);
183
184int
185BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
186 BN_CTX *ctx)
187{
188 if (r == m) {
189 BNerror(BN_R_INVALID_ARGUMENT);
190 return 0;
191 }
192 if (!BN_sub(r, a, b))
193 return 0;
194 return BN_nnmod(r, r, m, ctx);
195}
196LCRYPTO_ALIAS(BN_mod_sub);
197
198/*
199 * BN_mod_sub() variant that may only be used if both a and b are non-negative
200 * and have already been reduced (less than m).
201 */
202int
203BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m)
204{
205 if (r == m) {
206 BNerror(BN_R_INVALID_ARGUMENT);
207 return 0;
208 }
209 if (BN_ucmp(a, b) >= 0)
210 return BN_usub(r, a, b);
211 if (!BN_usub(r, b, a))
212 return 0;
213 return BN_usub(r, m, r);
214}
215LCRYPTO_ALIAS(BN_mod_sub_quick);
216
217int
218BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
219 BN_CTX *ctx)
220{
221 BIGNUM *rr;
222 int ret = 0;
223
224 BN_CTX_start(ctx);
225
226 if (r == m) {
227 BNerror(BN_R_INVALID_ARGUMENT);
228 goto err;
229 }
230
231 rr = r;
232 if (rr == a || rr == b)
233 rr = BN_CTX_get(ctx);
234 if (rr == NULL)
235 goto err;
236
237 if (a == b) {
238 if (!BN_sqr(rr, a, ctx))
239 goto err;
240 } else {
241 if (!BN_mul(rr, a, b, ctx))
242 goto err;
243 }
244 if (!BN_nnmod(r, rr, m, ctx))
245 goto err;
246
247 ret = 1;
248
249 err:
250 BN_CTX_end(ctx);
251
252 return ret;
253}
254LCRYPTO_ALIAS(BN_mod_mul);
255
256int
257BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
258{
259 return BN_mod_mul(r, a, a, m, ctx);
260}
261LCRYPTO_ALIAS(BN_mod_sqr);
262
263int
264BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
265{
266 if (r == m) {
267 BNerror(BN_R_INVALID_ARGUMENT);
268 return 0;
269 }
270 if (!BN_lshift1(r, a))
271 return 0;
272 return BN_nnmod(r, r, m, ctx);
273}
274LCRYPTO_ALIAS(BN_mod_lshift1);
275
276/*
277 * BN_mod_lshift1() variant that may be used if a is non-negative
278 * and has already been reduced (less than m).
279 */
280int
281BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m)
282{
283 if (r == m) {
284 BNerror(BN_R_INVALID_ARGUMENT);
285 return 0;
286 }
287 if (!BN_lshift1(r, a))
288 return 0;
289 if (BN_ucmp(r, m) >= 0)
290 return BN_usub(r, r, m);
291 return 1;
292}
293LCRYPTO_ALIAS(BN_mod_lshift1_quick);
294
295int
296BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m, BN_CTX *ctx)
297{
298 BIGNUM *abs_m;
299 int ret = 0;
300
301 BN_CTX_start(ctx);
302
303 if (r == m) {
304 BNerror(BN_R_INVALID_ARGUMENT);
305 goto err;
306 }
307
308 if (!BN_nnmod(r, a, m, ctx))
309 goto err;
310
311 if (BN_is_negative(m)) {
312 if ((abs_m = BN_CTX_get(ctx)) == NULL)
313 goto err;
314 if (!bn_copy(abs_m, m))
315 goto err;
316 BN_set_negative(abs_m, 0);
317 m = abs_m;
318 }
319 if (!BN_mod_lshift_quick(r, r, n, m))
320 goto err;
321
322 ret = 1;
323 err:
324 BN_CTX_end(ctx);
325
326 return ret;
327}
328LCRYPTO_ALIAS(BN_mod_lshift);
329
330/*
331 * BN_mod_lshift() variant that may be used if a is non-negative
332 * and has already been reduced (less than m).
333 */
334int
335BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m)
336{
337 int max_shift;
338
339 if (r == m) {
340 BNerror(BN_R_INVALID_ARGUMENT);
341 return 0;
342 }
343
344 if (!bn_copy(r, a))
345 return 0;
346
347 while (n > 0) {
348 if ((max_shift = BN_num_bits(m) - BN_num_bits(r)) < 0) {
349 BNerror(BN_R_INPUT_NOT_REDUCED);
350 return 0;
351 }
352 if (max_shift == 0)
353 max_shift = 1;
354 if (max_shift > n)
355 max_shift = n;
356
357 if (!BN_lshift(r, r, max_shift))
358 return 0;
359 n -= max_shift;
360
361 if (BN_ucmp(r, m) >= 0) {
362 if (!BN_usub(r, r, m))
363 return 0;
364 }
365 }
366
367 return 1;
368}
369LCRYPTO_ALIAS(BN_mod_lshift_quick);
diff --git a/src/lib/libcrypto/bn/bn_mod_sqrt.c b/src/lib/libcrypto/bn/bn_mod_sqrt.c
deleted file mode 100644
index 280002cc48..0000000000
--- a/src/lib/libcrypto/bn/bn_mod_sqrt.c
+++ /dev/null
@@ -1,723 +0,0 @@
1/* $OpenBSD: bn_mod_sqrt.c,v 1.3 2023/08/03 18:53:55 tb Exp $ */
2
3/*
4 * Copyright (c) 2022 Theo Buehler <tb@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <openssl/err.h>
20
21#include "bn_local.h"
22
23/*
24 * Tonelli-Shanks according to H. Cohen "A Course in Computational Algebraic
25 * Number Theory", Section 1.5.1, Springer GTM volume 138, Berlin, 1996.
26 *
27 * Under the assumption that p is prime and a is a quadratic residue, we know:
28 *
29 * a^[(p-1)/2] = 1 (mod p). (*)
30 *
31 * To find a square root of a (mod p), we handle three cases of increasing
32 * complexity. In the first two cases, we can compute a square root using an
33 * explicit formula, thus avoiding the probabilistic nature of Tonelli-Shanks.
34 *
35 * 1. p = 3 (mod 4).
36 *
37 * Set n = (p+1)/4. Then 2n = 1 + (p-1)/2 and (*) shows that x = a^n (mod p)
38 * is a square root of a: x^2 = a^(2n) = a * a^[(p-1)/2] = a (mod p).
39 *
40 * 2. p = 5 (mod 8).
41 *
42 * This uses a simplification due to Atkin. By Theorem 1.4.7 and 1.4.9, the
43 * Kronecker symbol (2/p) evaluates to (-1)^[(p^2-1)/8]. From p = 5 (mod 8)
44 * we get (p^2-1)/8 = 1 (mod 2), so (2/p) = -1, and thus
45 *
46 * 2^[(p-1)/2] = -1 (mod p). (**)
47 *
48 * Set b = (2a)^[(p-5)/8]. With (p-1)/2 = 2 + (p-5)/2, (*) and (**) show
49 *
50 * i = 2 a b^2 is a square root of -1 (mod p).
51 *
52 * Indeed, i^2 = 2^2 a^2 b^4 = 2^[(p-1)/2] a^[(p-1)/2] = -1 (mod p). Because
53 * of (i-1)^2 = -2i (mod p) and i (-i) = 1 (mod p), a square root of a is
54 *
55 * x = a b (i-1)
56 *
57 * as x^2 = a^2 b^2 (-2i) = a (2 a b^2) (-i) = a (mod p).
58 *
59 * 3. p = 1 (mod 8).
60 *
61 * This is the Tonelli-Shanks algorithm. For a prime p, the multiplicative
62 * group of GF(p) is cyclic of order p - 1 = 2^s q, with odd q. Denote its
63 * 2-Sylow subgroup by S. It is cyclic of order 2^s. The squares in S have
64 * order dividing 2^(s-1). They are the even powers of any generator z of S.
65 * If a is a quadratic residue, 1 = a^[(p-1)/2] = (a^q)^[2^(s-1)], so b = a^q
66 * is a square in S. Therefore there is an integer k such that b z^(2k) = 1.
67 * Set x = a^[(q+1)/2] z^k, and find x^2 = a (mod p).
68 *
69 * The problem is thus reduced to finding a generator z of the 2-Sylow
70 * subgroup S of GF(p)* and finding k. An iterative constructions avoids
71 * the need for an explicit k, a generator is found by a randomized search.
72 *
73 * While we do not actually know that p is a prime number, we can still apply
74 * the formulas in cases 1 and 2 and verify that we have indeed found a square
75 * root of p. Similarly, in case 3, we can try to find a quadratic non-residue,
76 * which will fail for example if p is a square. The iterative construction
77 * may or may not find a candidate square root which we can then validate.
78 */
79
80/*
81 * Handle the cases where p is 2, p isn't odd or p is one. Since BN_mod_sqrt()
82 * can run on untrusted data, a primality check is too expensive. Also treat
83 * the obvious cases where a is 0 or 1.
84 */
85
86static int
87bn_mod_sqrt_trivial_cases(int *done, BIGNUM *out_sqrt, const BIGNUM *a,
88 const BIGNUM *p, BN_CTX *ctx)
89{
90 *done = 1;
91
92 if (BN_abs_is_word(p, 2))
93 return BN_set_word(out_sqrt, BN_is_odd(a));
94
95 if (!BN_is_odd(p) || BN_abs_is_word(p, 1)) {
96 BNerror(BN_R_P_IS_NOT_PRIME);
97 return 0;
98 }
99
100 if (BN_is_zero(a) || BN_is_one(a))
101 return BN_set_word(out_sqrt, BN_is_one(a));
102
103 *done = 0;
104
105 return 1;
106}
107
108/*
109 * Case 1. We know that (a/p) = 1 and that p = 3 (mod 4).
110 */
111
112static int
113bn_mod_sqrt_p_is_3_mod_4(BIGNUM *out_sqrt, const BIGNUM *a, const BIGNUM *p,
114 BN_CTX *ctx)
115{
116 BIGNUM *n;
117 int ret = 0;
118
119 BN_CTX_start(ctx);
120
121 if ((n = BN_CTX_get(ctx)) == NULL)
122 goto err;
123
124 /* Calculate n = (|p| + 1) / 4. */
125 if (!BN_uadd(n, p, BN_value_one()))
126 goto err;
127 if (!BN_rshift(n, n, 2))
128 goto err;
129
130 /* By case 1 above, out_sqrt = a^n is a square root of a (mod p). */
131 if (!BN_mod_exp_ct(out_sqrt, a, n, p, ctx))
132 goto err;
133
134 ret = 1;
135
136 err:
137 BN_CTX_end(ctx);
138
139 return ret;
140}
141
142/*
143 * Case 2. We know that (a/p) = 1 and that p = 5 (mod 8).
144 */
145
146static int
147bn_mod_sqrt_p_is_5_mod_8(BIGNUM *out_sqrt, const BIGNUM *a, const BIGNUM *p,
148 BN_CTX *ctx)
149{
150 BIGNUM *b, *i, *n, *tmp;
151 int ret = 0;
152
153 BN_CTX_start(ctx);
154
155 if ((b = BN_CTX_get(ctx)) == NULL)
156 goto err;
157 if ((i = BN_CTX_get(ctx)) == NULL)
158 goto err;
159 if ((n = BN_CTX_get(ctx)) == NULL)
160 goto err;
161 if ((tmp = BN_CTX_get(ctx)) == NULL)
162 goto err;
163
164 /* Calculate n = (|p| - 5) / 8. Since p = 5 (mod 8), simply shift. */
165 if (!BN_rshift(n, p, 3))
166 goto err;
167 BN_set_negative(n, 0);
168
169 /* Compute tmp = 2a (mod p) for later use. */
170 if (!BN_mod_lshift1(tmp, a, p, ctx))
171 goto err;
172
173 /* Calculate b = (2a)^n (mod p). */
174 if (!BN_mod_exp_ct(b, tmp, n, p, ctx))
175 goto err;
176
177 /* Calculate i = 2 a b^2 (mod p). */
178 if (!BN_mod_sqr(i, b, p, ctx))
179 goto err;
180 if (!BN_mod_mul(i, tmp, i, p, ctx))
181 goto err;
182
183 /* A square root is out_sqrt = a b (i-1) (mod p). */
184 if (!BN_sub_word(i, 1))
185 goto err;
186 if (!BN_mod_mul(out_sqrt, a, b, p, ctx))
187 goto err;
188 if (!BN_mod_mul(out_sqrt, out_sqrt, i, p, ctx))
189 goto err;
190
191 ret = 1;
192
193 err:
194 BN_CTX_end(ctx);
195
196 return ret;
197}
198
199/*
200 * Case 3. We know that (a/p) = 1 and that p = 1 (mod 8).
201 */
202
203/*
204 * Simple helper. To find a generator of the 2-Sylow subgroup of GF(p)*, we
205 * need to find a quadratic non-residue of p, i.e., n such that (n/p) = -1.
206 */
207
208static int
209bn_mod_sqrt_n_is_non_residue(int *is_non_residue, const BIGNUM *n,
210 const BIGNUM *p, BN_CTX *ctx)
211{
212 switch (BN_kronecker(n, p, ctx)) {
213 case -1:
214 *is_non_residue = 1;
215 return 1;
216 case 1:
217 *is_non_residue = 0;
218 return 1;
219 case 0:
220 /* n divides p, so ... */
221 BNerror(BN_R_P_IS_NOT_PRIME);
222 return 0;
223 default:
224 return 0;
225 }
226}
227
228/*
229 * The following is the only non-deterministic part preparing Tonelli-Shanks.
230 *
231 * If we find n such that (n/p) = -1, then n^q (mod p) is a generator of the
232 * 2-Sylow subgroup of GF(p)*. To find such n, first try some small numbers,
233 * then random ones.
234 */
235
236static int
237bn_mod_sqrt_find_sylow_generator(BIGNUM *out_generator, const BIGNUM *p,
238 const BIGNUM *q, BN_CTX *ctx)
239{
240 BIGNUM *n, *p_abs;
241 int i, is_non_residue;
242 int ret = 0;
243
244 BN_CTX_start(ctx);
245
246 if ((n = BN_CTX_get(ctx)) == NULL)
247 goto err;
248 if ((p_abs = BN_CTX_get(ctx)) == NULL)
249 goto err;
250
251 for (i = 2; i < 32; i++) {
252 if (!BN_set_word(n, i))
253 goto err;
254 if (!bn_mod_sqrt_n_is_non_residue(&is_non_residue, n, p, ctx))
255 goto err;
256 if (is_non_residue)
257 goto found;
258 }
259
260 if (!bn_copy(p_abs, p))
261 goto err;
262 BN_set_negative(p_abs, 0);
263
264 for (i = 0; i < 128; i++) {
265 if (!bn_rand_interval(n, 32, p_abs))
266 goto err;
267 if (!bn_mod_sqrt_n_is_non_residue(&is_non_residue, n, p, ctx))
268 goto err;
269 if (is_non_residue)
270 goto found;
271 }
272
273 /*
274 * The probability to get here is < 2^(-128) for prime p. For squares
275 * it is easy: for p = 1369 = 37^2 this happens in ~3% of runs.
276 */
277
278 BNerror(BN_R_TOO_MANY_ITERATIONS);
279 goto err;
280
281 found:
282 /*
283 * If p is prime, n^q generates the 2-Sylow subgroup S of GF(p)*.
284 */
285
286 if (!BN_mod_exp_ct(out_generator, n, q, p, ctx))
287 goto err;
288
289 /* Sanity: p is not necessarily prime, so we could have found 0 or 1. */
290 if (BN_is_zero(out_generator) || BN_is_one(out_generator)) {
291 BNerror(BN_R_P_IS_NOT_PRIME);
292 goto err;
293 }
294
295 ret = 1;
296
297 err:
298 BN_CTX_end(ctx);
299
300 return ret;
301}
302
303/*
304 * Initialization step for Tonelli-Shanks.
305 *
306 * In the end, b = a^q (mod p) and x = a^[(q+1)/2] (mod p). Cohen optimizes this
307 * to minimize taking powers of a. This is a bit confusing and distracting, so
308 * factor this into a separate function.
309 */
310
311static int
312bn_mod_sqrt_tonelli_shanks_initialize(BIGNUM *b, BIGNUM *x, const BIGNUM *a,
313 const BIGNUM *p, const BIGNUM *q, BN_CTX *ctx)
314{
315 BIGNUM *k;
316 int ret = 0;
317
318 BN_CTX_start(ctx);
319
320 if ((k = BN_CTX_get(ctx)) == NULL)
321 goto err;
322
323 /* k = (q-1)/2. Since q is odd, we can shift. */
324 if (!BN_rshift1(k, q))
325 goto err;
326
327 /* x = a^[(q-1)/2] (mod p). */
328 if (!BN_mod_exp_ct(x, a, k, p, ctx))
329 goto err;
330
331 /* b = ax^2 = a^q (mod p). */
332 if (!BN_mod_sqr(b, x, p, ctx))
333 goto err;
334 if (!BN_mod_mul(b, a, b, p, ctx))
335 goto err;
336
337 /* x = ax = a^[(q+1)/2] (mod p). */
338 if (!BN_mod_mul(x, a, x, p, ctx))
339 goto err;
340
341 ret = 1;
342
343 err:
344 BN_CTX_end(ctx);
345
346 return ret;
347}
348
349/*
350 * Find smallest exponent m such that b^(2^m) = 1 (mod p). Assuming that a
351 * is a quadratic residue and p is a prime, we know that 1 <= m < r.
352 */
353
354static int
355bn_mod_sqrt_tonelli_shanks_find_exponent(int *out_exponent, const BIGNUM *b,
356 const BIGNUM *p, int r, BN_CTX *ctx)
357{
358 BIGNUM *x;
359 int m;
360 int ret = 0;
361
362 BN_CTX_start(ctx);
363
364 if ((x = BN_CTX_get(ctx)) == NULL)
365 goto err;
366
367 /*
368 * If r <= 1, the Tonelli-Shanks iteration should have terminated as
369 * r == 1 implies b == 1.
370 */
371 if (r <= 1) {
372 BNerror(BN_R_P_IS_NOT_PRIME);
373 goto err;
374 }
375
376 /*
377 * Sanity check to ensure taking squares actually does something:
378 * If b is 1, the Tonelli-Shanks iteration should have terminated.
379 * If b is 0, something's very wrong, in particular p can't be prime.
380 */
381 if (BN_is_zero(b) || BN_is_one(b)) {
382 BNerror(BN_R_P_IS_NOT_PRIME);
383 goto err;
384 }
385
386 if (!bn_copy(x, b))
387 goto err;
388
389 for (m = 1; m < r; m++) {
390 if (!BN_mod_sqr(x, x, p, ctx))
391 goto err;
392 if (BN_is_one(x))
393 break;
394 }
395
396 if (m >= r) {
397 /* This means a is not a quadratic residue. As (a/p) = 1, ... */
398 BNerror(BN_R_P_IS_NOT_PRIME);
399 goto err;
400 }
401
402 *out_exponent = m;
403
404 ret = 1;
405
406 err:
407 BN_CTX_end(ctx);
408
409 return ret;
410}
411
412/*
413 * The update step. With the minimal m such that b^(2^m) = 1 (mod m),
414 * set t = y^[2^(r-m-1)] (mod p) and update x = xt, y = t^2, b = by.
415 * This preserves the loop invariants a b = x^2, y^[2^(r-1)] = -1 and
416 * b^[2^(r-1)] = 1.
417 */
418
419static int
420bn_mod_sqrt_tonelli_shanks_update(BIGNUM *b, BIGNUM *x, BIGNUM *y,
421 const BIGNUM *p, int m, int r, BN_CTX *ctx)
422{
423 BIGNUM *t;
424 int ret = 0;
425
426 BN_CTX_start(ctx);
427
428 if ((t = BN_CTX_get(ctx)) == NULL)
429 goto err;
430
431 /* t = y^[2^(r-m-1)] (mod p). */
432 if (!BN_set_bit(t, r - m - 1))
433 goto err;
434 if (!BN_mod_exp_ct(t, y, t, p, ctx))
435 goto err;
436
437 /* x = xt (mod p). */
438 if (!BN_mod_mul(x, x, t, p, ctx))
439 goto err;
440
441 /* y = t^2 = y^[2^(r-m)] (mod p). */
442 if (!BN_mod_sqr(y, t, p, ctx))
443 goto err;
444
445 /* b = by (mod p). */
446 if (!BN_mod_mul(b, b, y, p, ctx))
447 goto err;
448
449 ret = 1;
450
451 err:
452 BN_CTX_end(ctx);
453
454 return ret;
455}
456
457static int
458bn_mod_sqrt_p_is_1_mod_8(BIGNUM *out_sqrt, const BIGNUM *a, const BIGNUM *p,
459 BN_CTX *ctx)
460{
461 BIGNUM *b, *q, *x, *y;
462 int e, m, r;
463 int ret = 0;
464
465 BN_CTX_start(ctx);
466
467 if ((b = BN_CTX_get(ctx)) == NULL)
468 goto err;
469 if ((q = BN_CTX_get(ctx)) == NULL)
470 goto err;
471 if ((x = BN_CTX_get(ctx)) == NULL)
472 goto err;
473 if ((y = BN_CTX_get(ctx)) == NULL)
474 goto err;
475
476 /*
477 * Factor p - 1 = 2^e q with odd q. Since p = 1 (mod 8), we know e >= 3.
478 */
479
480 e = 1;
481 while (!BN_is_bit_set(p, e))
482 e++;
483 if (!BN_rshift(q, p, e))
484 goto err;
485
486 if (!bn_mod_sqrt_find_sylow_generator(y, p, q, ctx))
487 goto err;
488
489 /*
490 * Set b = a^q (mod p) and x = a^[(q+1)/2] (mod p).
491 */
492 if (!bn_mod_sqrt_tonelli_shanks_initialize(b, x, a, p, q, ctx))
493 goto err;
494
495 /*
496 * The Tonelli-Shanks iteration. Starting with r = e, the following loop
497 * invariants hold at the start of the loop.
498 *
499 * a b = x^2 (mod p)
500 * y^[2^(r-1)] = -1 (mod p)
501 * b^[2^(r-1)] = 1 (mod p)
502 *
503 * In particular, if b = 1 (mod p), x is a square root of a.
504 *
505 * Since p - 1 = 2^e q, we have 2^(e-1) q = (p - 1) / 2, so in the first
506 * iteration this follows from (a/p) = 1, (n/p) = -1, y = n^q, b = a^q.
507 *
508 * In subsequent iterations, t = y^[2^(r-m-1)], where m is the smallest
509 * m such that b^(2^m) = 1. With x = xt (mod p) and b = bt^2 (mod p) the
510 * first invariant is preserved, the second and third follow from
511 * y = t^2 (mod p) and r = m as well as the choice of m.
512 *
513 * Finally, r is strictly decreasing in each iteration. If p is prime,
514 * let S be the 2-Sylow subgroup of GF(p)*. We can prove the algorithm
515 * stops: Let S_r be the subgroup of S consisting of elements of order
516 * dividing 2^r. Then S_r = <y> and b is in S_(r-1). The S_r form a
517 * descending filtration of S and when r = 1, then b = 1.
518 */
519
520 for (r = e; r >= 1; r = m) {
521 /*
522 * Termination condition. If b == 1 then x is a square root.
523 */
524 if (BN_is_one(b))
525 goto done;
526
527 /* Find smallest exponent 1 <= m < r such that b^(2^m) == 1. */
528 if (!bn_mod_sqrt_tonelli_shanks_find_exponent(&m, b, p, r, ctx))
529 goto err;
530
531 /*
532 * With t = y^[2^(r-m-1)], update x = xt, y = t^2, b = by.
533 */
534 if (!bn_mod_sqrt_tonelli_shanks_update(b, x, y, p, m, r, ctx))
535 goto err;
536
537 /*
538 * Sanity check to make sure we don't loop indefinitely.
539 * bn_mod_sqrt_tonelli_shanks_find_exponent() ensures m < r.
540 */
541 if (r <= m)
542 goto err;
543 }
544
545 /*
546 * If p is prime, we should not get here.
547 */
548
549 BNerror(BN_R_NOT_A_SQUARE);
550 goto err;
551
552 done:
553 if (!bn_copy(out_sqrt, x))
554 goto err;
555
556 ret = 1;
557
558 err:
559 BN_CTX_end(ctx);
560
561 return ret;
562}
563
564/*
565 * Choose the smaller of sqrt and |p| - sqrt.
566 */
567
568static int
569bn_mod_sqrt_normalize(BIGNUM *sqrt, const BIGNUM *p, BN_CTX *ctx)
570{
571 BIGNUM *x;
572 int ret = 0;
573
574 BN_CTX_start(ctx);
575
576 if ((x = BN_CTX_get(ctx)) == NULL)
577 goto err;
578
579 if (!BN_lshift1(x, sqrt))
580 goto err;
581
582 if (BN_ucmp(x, p) > 0) {
583 if (!BN_usub(sqrt, p, sqrt))
584 goto err;
585 }
586
587 ret = 1;
588
589 err:
590 BN_CTX_end(ctx);
591
592 return ret;
593}
594
595/*
596 * Verify that a = (sqrt_a)^2 (mod p). Requires that a is reduced (mod p).
597 */
598
599static int
600bn_mod_sqrt_verify(const BIGNUM *a, const BIGNUM *sqrt_a, const BIGNUM *p,
601 BN_CTX *ctx)
602{
603 BIGNUM *x;
604 int ret = 0;
605
606 BN_CTX_start(ctx);
607
608 if ((x = BN_CTX_get(ctx)) == NULL)
609 goto err;
610
611 if (!BN_mod_sqr(x, sqrt_a, p, ctx))
612 goto err;
613
614 if (BN_cmp(x, a) != 0) {
615 BNerror(BN_R_NOT_A_SQUARE);
616 goto err;
617 }
618
619 ret = 1;
620
621 err:
622 BN_CTX_end(ctx);
623
624 return ret;
625}
626
627static int
628bn_mod_sqrt_internal(BIGNUM *out_sqrt, const BIGNUM *a, const BIGNUM *p,
629 BN_CTX *ctx)
630{
631 BIGNUM *a_mod_p, *sqrt;
632 BN_ULONG lsw;
633 int done;
634 int kronecker;
635 int ret = 0;
636
637 BN_CTX_start(ctx);
638
639 if ((a_mod_p = BN_CTX_get(ctx)) == NULL)
640 goto err;
641 if ((sqrt = BN_CTX_get(ctx)) == NULL)
642 goto err;
643
644 if (!BN_nnmod(a_mod_p, a, p, ctx))
645 goto err;
646
647 if (!bn_mod_sqrt_trivial_cases(&done, sqrt, a_mod_p, p, ctx))
648 goto err;
649 if (done)
650 goto verify;
651
652 /*
653 * Make sure that the Kronecker symbol (a/p) == 1. In case p is prime
654 * this is equivalent to a having a square root (mod p). The cost of
655 * BN_kronecker() is O(log^2(n)). This is small compared to the cost
656 * O(log^4(n)) of Tonelli-Shanks.
657 */
658
659 if ((kronecker = BN_kronecker(a_mod_p, p, ctx)) == -2)
660 goto err;
661 if (kronecker <= 0) {
662 /* This error is only accurate if p is known to be a prime. */
663 BNerror(BN_R_NOT_A_SQUARE);
664 goto err;
665 }
666
667 lsw = BN_lsw(p);
668
669 if (lsw % 4 == 3) {
670 if (!bn_mod_sqrt_p_is_3_mod_4(sqrt, a_mod_p, p, ctx))
671 goto err;
672 } else if (lsw % 8 == 5) {
673 if (!bn_mod_sqrt_p_is_5_mod_8(sqrt, a_mod_p, p, ctx))
674 goto err;
675 } else if (lsw % 8 == 1) {
676 if (!bn_mod_sqrt_p_is_1_mod_8(sqrt, a_mod_p, p, ctx))
677 goto err;
678 } else {
679 /* Impossible to hit since the trivial cases ensure p is odd. */
680 BNerror(BN_R_P_IS_NOT_PRIME);
681 goto err;
682 }
683
684 if (!bn_mod_sqrt_normalize(sqrt, p, ctx))
685 goto err;
686
687 verify:
688 if (!bn_mod_sqrt_verify(a_mod_p, sqrt, p, ctx))
689 goto err;
690
691 if (!bn_copy(out_sqrt, sqrt))
692 goto err;
693
694 ret = 1;
695
696 err:
697 BN_CTX_end(ctx);
698
699 return ret;
700}
701
702BIGNUM *
703BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
704{
705 BIGNUM *out_sqrt;
706
707 if ((out_sqrt = in) == NULL)
708 out_sqrt = BN_new();
709 if (out_sqrt == NULL)
710 goto err;
711
712 if (!bn_mod_sqrt_internal(out_sqrt, a, p, ctx))
713 goto err;
714
715 return out_sqrt;
716
717 err:
718 if (out_sqrt != in)
719 BN_free(out_sqrt);
720
721 return NULL;
722}
723LCRYPTO_ALIAS(BN_mod_sqrt);
diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c
deleted file mode 100644
index edd7bcd0c8..0000000000
--- a/src/lib/libcrypto/bn/bn_mont.c
+++ /dev/null
@@ -1,621 +0,0 @@
1/* $OpenBSD: bn_mont.c,v 1.66 2025/03/09 15:22:40 tb Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112/*
113 * Details about Montgomery multiplication algorithms can be found at
114 * http://security.ece.orst.edu/publications.html, e.g.
115 * http://security.ece.orst.edu/koc/papers/j37acmon.pdf and
116 * sections 3.8 and 4.2 in http://security.ece.orst.edu/koc/papers/r01rsasw.pdf
117 */
118
119#include <stdio.h>
120#include <stdint.h>
121#include <string.h>
122
123#include "bn_internal.h"
124#include "bn_local.h"
125
126BN_MONT_CTX *
127BN_MONT_CTX_new(void)
128{
129 BN_MONT_CTX *mctx;
130
131 if ((mctx = calloc(1, sizeof(BN_MONT_CTX))) == NULL)
132 return NULL;
133 mctx->flags = BN_FLG_MALLOCED;
134
135 BN_init(&mctx->RR);
136 BN_init(&mctx->N);
137
138 return mctx;
139}
140LCRYPTO_ALIAS(BN_MONT_CTX_new);
141
142void
143BN_MONT_CTX_free(BN_MONT_CTX *mctx)
144{
145 if (mctx == NULL)
146 return;
147
148 BN_free(&mctx->RR);
149 BN_free(&mctx->N);
150
151 if (mctx->flags & BN_FLG_MALLOCED)
152 free(mctx);
153}
154LCRYPTO_ALIAS(BN_MONT_CTX_free);
155
156BN_MONT_CTX *
157BN_MONT_CTX_create(const BIGNUM *bn, BN_CTX *bn_ctx)
158{
159 BN_MONT_CTX *mctx;
160
161 if ((mctx = BN_MONT_CTX_new()) == NULL)
162 goto err;
163 if (!BN_MONT_CTX_set(mctx, bn, bn_ctx))
164 goto err;
165
166 return mctx;
167
168 err:
169 BN_MONT_CTX_free(mctx);
170
171 return NULL;
172}
173
174BN_MONT_CTX *
175BN_MONT_CTX_copy(BN_MONT_CTX *dst, const BN_MONT_CTX *src)
176{
177 if (dst == src)
178 return dst;
179
180 if (!bn_copy(&dst->RR, &src->RR))
181 return NULL;
182 if (!bn_copy(&dst->N, &src->N))
183 return NULL;
184
185 dst->ri = src->ri;
186 dst->n0[0] = src->n0[0];
187 dst->n0[1] = src->n0[1];
188
189 return dst;
190}
191LCRYPTO_ALIAS(BN_MONT_CTX_copy);
192
193int
194BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
195{
196 BIGNUM *N, *Ninv, *Rinv, *R;
197 int ret = 0;
198
199 BN_CTX_start(ctx);
200
201 if ((N = BN_CTX_get(ctx)) == NULL)
202 goto err;
203 if ((Ninv = BN_CTX_get(ctx)) == NULL)
204 goto err;
205 if ((R = BN_CTX_get(ctx)) == NULL)
206 goto err;
207 if ((Rinv = BN_CTX_get(ctx)) == NULL)
208 goto err;
209
210 /* Save modulus and determine length of R. */
211 if (BN_is_zero(mod))
212 goto err;
213 if (!bn_copy(&mont->N, mod))
214 goto err;
215 mont->N.neg = 0;
216 mont->ri = ((BN_num_bits(mod) + BN_BITS2 - 1) / BN_BITS2) * BN_BITS2;
217 if (mont->ri * 2 < mont->ri)
218 goto err;
219
220 /*
221 * Compute Ninv = (R * Rinv - 1)/N mod R, for R = 2^64. This provides
222 * a single or double word result (dependent on BN word size), that is
223 * later used to implement Montgomery reduction.
224 */
225 BN_zero(R);
226 if (!BN_set_bit(R, 64))
227 goto err;
228
229 /* N = N mod R. */
230 if (!bn_wexpand(N, 2))
231 goto err;
232 if (!BN_set_word(N, mod->d[0]))
233 goto err;
234#if BN_BITS2 == 32
235 if (mod->top > 1) {
236 N->d[1] = mod->d[1];
237 N->top += bn_ct_ne_zero(N->d[1]);
238 }
239#endif
240
241 /* Rinv = R^-1 mod N */
242 if ((BN_mod_inverse_ct(Rinv, R, N, ctx)) == NULL)
243 goto err;
244
245 /* Ninv = (R * Rinv - 1) / N */
246 if (!BN_lshift(Ninv, Rinv, 64))
247 goto err;
248 if (BN_is_zero(Ninv)) {
249 /* R * Rinv == 0, set to R so that R * Rinv - 1 is mod R. */
250 if (!BN_set_bit(Ninv, 64))
251 goto err;
252 }
253 if (!BN_sub_word(Ninv, 1))
254 goto err;
255 if (!BN_div_ct(Ninv, NULL, Ninv, N, ctx))
256 goto err;
257
258 /* Store least significant word(s) of Ninv. */
259 mont->n0[0] = mont->n0[1] = 0;
260 if (Ninv->top > 0)
261 mont->n0[0] = Ninv->d[0];
262#if BN_BITS2 == 32
263 /* Some BN_BITS2 == 32 platforms (namely parisc) use two words of Ninv. */
264 if (Ninv->top > 1)
265 mont->n0[1] = Ninv->d[1];
266#endif
267
268 /* Compute RR = R * R mod N, for use when converting to Montgomery form. */
269 BN_zero(&mont->RR);
270 if (!BN_set_bit(&mont->RR, mont->ri * 2))
271 goto err;
272 if (!BN_mod_ct(&mont->RR, &mont->RR, &mont->N, ctx))
273 goto err;
274
275 ret = 1;
276 err:
277 BN_CTX_end(ctx);
278
279 return ret;
280}
281LCRYPTO_ALIAS(BN_MONT_CTX_set);
282
283BN_MONT_CTX *
284BN_MONT_CTX_set_locked(BN_MONT_CTX **pmctx, int lock, const BIGNUM *mod,
285 BN_CTX *ctx)
286{
287 BN_MONT_CTX *mctx = NULL;
288
289 CRYPTO_r_lock(lock);
290 mctx = *pmctx;
291 CRYPTO_r_unlock(lock);
292
293 if (mctx != NULL)
294 goto done;
295
296 if ((mctx = BN_MONT_CTX_create(mod, ctx)) == NULL)
297 goto err;
298
299 CRYPTO_w_lock(lock);
300 if (*pmctx != NULL) {
301 /* Someone else raced us... */
302 BN_MONT_CTX_free(mctx);
303 mctx = *pmctx;
304 } else {
305 *pmctx = mctx;
306 }
307 CRYPTO_w_unlock(lock);
308
309 goto done;
310 err:
311 BN_MONT_CTX_free(mctx);
312 mctx = NULL;
313 done:
314 return mctx;
315}
316LCRYPTO_ALIAS(BN_MONT_CTX_set_locked);
317
318/*
319 * bn_montgomery_reduce() performs Montgomery reduction, reducing the input
320 * from its Montgomery form aR to a, returning the result in r. Note that the
321 * input is mutated in the process of performing the reduction, destroying its
322 * original value.
323 */
324static int
325bn_montgomery_reduce(BIGNUM *r, BIGNUM *a, BN_MONT_CTX *mctx)
326{
327 BIGNUM *n;
328 BN_ULONG *ap, *rp, n0, v, carry, mask;
329 int i, max, n_len;
330
331 n = &mctx->N;
332 n_len = mctx->N.top;
333
334 if (n_len == 0) {
335 BN_zero(r);
336 return 1;
337 }
338
339 if (!bn_wexpand(r, n_len))
340 return 0;
341
342 /*
343 * Expand a to twice the length of the modulus, zero if necessary.
344 * XXX - make this a requirement of the caller.
345 */
346 if ((max = 2 * n_len) < n_len)
347 return 0;
348 if (!bn_wexpand(a, max))
349 return 0;
350 for (i = a->top; i < max; i++)
351 a->d[i] = 0;
352
353 carry = 0;
354 n0 = mctx->n0[0];
355
356 /* Add multiples of the modulus, so that it becomes divisible by R. */
357 for (i = 0; i < n_len; i++) {
358 v = bn_mul_add_words(&a->d[i], n->d, n_len, a->d[i] * n0);
359 bn_addw_addw(v, a->d[i + n_len], carry, &carry,
360 &a->d[i + n_len]);
361 }
362
363 /* Divide by R (this is the equivalent of right shifting by n_len). */
364 ap = &a->d[n_len];
365
366 /*
367 * The output is now in the range of [0, 2N). Attempt to reduce once by
368 * subtracting the modulus. If the reduction was necessary then the
369 * result is already in r, otherwise copy the value prior to reduction
370 * from the top half of a.
371 */
372 mask = carry - bn_sub_words(r->d, ap, n->d, n_len);
373
374 rp = r->d;
375 for (i = 0; i < n_len; i++) {
376 *rp = (*rp & ~mask) | (*ap & mask);
377 rp++;
378 ap++;
379 }
380 r->top = n_len;
381
382 bn_correct_top(r);
383
384 BN_set_negative(r, a->neg ^ n->neg);
385
386 return 1;
387}
388
389static int
390bn_mod_mul_montgomery_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
391 BN_MONT_CTX *mctx, BN_CTX *ctx)
392{
393 BIGNUM *tmp;
394 int ret = 0;
395
396 BN_CTX_start(ctx);
397
398 if ((tmp = BN_CTX_get(ctx)) == NULL)
399 goto err;
400
401 if (a == b) {
402 if (!BN_sqr(tmp, a, ctx))
403 goto err;
404 } else {
405 if (!BN_mul(tmp, a, b, ctx))
406 goto err;
407 }
408
409 /* Reduce from aRR to aR. */
410 if (!bn_montgomery_reduce(r, tmp, mctx))
411 goto err;
412
413 ret = 1;
414 err:
415 BN_CTX_end(ctx);
416
417 return ret;
418}
419
420static void
421bn_montgomery_multiply_word(const BN_ULONG *ap, BN_ULONG b, const BN_ULONG *np,
422 BN_ULONG *tp, BN_ULONG w, BN_ULONG *carry_a, BN_ULONG *carry_n, int n_len)
423{
424 BN_ULONG x3, x2, x1, x0;
425
426 *carry_a = *carry_n = 0;
427
428 while (n_len & ~3) {
429 bn_qwmulw_addqw_addw(ap[3], ap[2], ap[1], ap[0], b,
430 tp[3], tp[2], tp[1], tp[0], *carry_a, carry_a,
431 &x3, &x2, &x1, &x0);
432 bn_qwmulw_addqw_addw(np[3], np[2], np[1], np[0], w,
433 x3, x2, x1, x0, *carry_n, carry_n,
434 &tp[3], &tp[2], &tp[1], &tp[0]);
435 ap += 4;
436 np += 4;
437 tp += 4;
438 n_len -= 4;
439 }
440 while (n_len > 0) {
441 bn_mulw_addw_addw(ap[0], b, tp[0], *carry_a, carry_a, &x0);
442 bn_mulw_addw_addw(np[0], w, x0, *carry_n, carry_n, &tp[0]);
443 ap++;
444 np++;
445 tp++;
446 n_len--;
447 }
448}
449
450/*
451 * bn_montgomery_multiply_words() computes r = aR * bR * R^-1 = abR for the
452 * given word arrays. The caller must ensure that rp, ap, bp and np are all
453 * n_len words in length, while tp must be n_len * 2 + 2 words in length.
454 */
455static void
456bn_montgomery_multiply_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
457 const BN_ULONG *np, BN_ULONG *tp, BN_ULONG n0, int n_len)
458{
459 BN_ULONG a0, b, carry_a, carry_n, carry, mask, w;
460 int i;
461
462 carry = 0;
463
464 for (i = 0; i < n_len; i++)
465 tp[i] = 0;
466
467 a0 = ap[0];
468
469 for (i = 0; i < n_len; i++) {
470 b = bp[i];
471
472 /* Compute new t[0] * n0, as we need it for this iteration. */
473 w = (a0 * b + tp[0]) * n0;
474
475 bn_montgomery_multiply_word(ap, b, np, tp, w, &carry_a,
476 &carry_n, n_len);
477 bn_addw_addw(carry_a, carry_n, carry, &carry, &tp[n_len]);
478
479 tp++;
480 }
481 tp[n_len] = carry;
482
483 /*
484 * The output is now in the range of [0, 2N). Attempt to reduce once by
485 * subtracting the modulus. If the reduction was necessary then the
486 * result is already in r, otherwise copy the value prior to reduction
487 * from tp.
488 */
489 mask = bn_ct_ne_zero(tp[n_len]) - bn_sub_words(rp, tp, np, n_len);
490
491 for (i = 0; i < n_len; i++) {
492 *rp = (*rp & ~mask) | (*tp & mask);
493 rp++;
494 tp++;
495 }
496}
497
498/*
499 * bn_montgomery_multiply() computes r = aR * bR * R^-1 = abR for the given
500 * BIGNUMs. The caller must ensure that the modulus is two or more words in
501 * length and that a and b have the same number of words as the modulus.
502 */
503static int
504bn_montgomery_multiply(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
505 BN_MONT_CTX *mctx, BN_CTX *ctx)
506{
507 BIGNUM *t;
508 int ret = 0;
509
510 BN_CTX_start(ctx);
511
512 if (mctx->N.top <= 1 || a->top != mctx->N.top || b->top != mctx->N.top)
513 goto err;
514 if (!bn_wexpand(r, mctx->N.top))
515 goto err;
516
517 if ((t = BN_CTX_get(ctx)) == NULL)
518 goto err;
519 if (!bn_wexpand(t, mctx->N.top * 2 + 2))
520 goto err;
521
522 bn_montgomery_multiply_words(r->d, a->d, b->d, mctx->N.d, t->d,
523 mctx->n0[0], mctx->N.top);
524
525 r->top = mctx->N.top;
526 bn_correct_top(r);
527
528 BN_set_negative(r, a->neg ^ b->neg);
529
530 ret = 1;
531 err:
532 BN_CTX_end(ctx);
533
534 return ret;
535}
536
537#ifndef OPENSSL_BN_ASM_MONT
538static int
539bn_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
540 BN_MONT_CTX *mctx, BN_CTX *ctx)
541{
542 if (mctx->N.top <= 1 || a->top != mctx->N.top || b->top != mctx->N.top)
543 return bn_mod_mul_montgomery_simple(r, a, b, mctx, ctx);
544
545 return bn_montgomery_multiply(r, a, b, mctx, ctx);
546}
547#else
548
549static int
550bn_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
551 BN_MONT_CTX *mctx, BN_CTX *ctx)
552{
553 if (mctx->N.top <= 1 || a->top != mctx->N.top || b->top != mctx->N.top)
554 return bn_mod_mul_montgomery_simple(r, a, b, mctx, ctx);
555
556 /*
557 * Legacy bn_mul_mont() performs stack based allocation, without
558 * size limitation. Allowing a large size results in the stack
559 * being blown.
560 */
561 if (mctx->N.top > (8 * 1024 / sizeof(BN_ULONG)))
562 return bn_montgomery_multiply(r, a, b, mctx, ctx);
563
564 if (!bn_wexpand(r, mctx->N.top))
565 return 0;
566
567 /*
568 * Legacy bn_mul_mont() can indicate that we should "fallback" to
569 * another implementation.
570 */
571 if (!bn_mul_mont(r->d, a->d, b->d, mctx->N.d, mctx->n0, mctx->N.top))
572 return bn_montgomery_multiply(r, a, b, mctx, ctx);
573
574 r->top = mctx->N.top;
575 bn_correct_top(r);
576
577 BN_set_negative(r, a->neg ^ b->neg);
578
579 return (1);
580}
581#endif
582
583int
584BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
585 BN_MONT_CTX *mctx, BN_CTX *ctx)
586{
587 /* Compute r = aR * bR * R^-1 mod N = abR mod N */
588 return bn_mod_mul_montgomery(r, a, b, mctx, ctx);
589}
590LCRYPTO_ALIAS(BN_mod_mul_montgomery);
591
592int
593BN_to_montgomery(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mctx, BN_CTX *ctx)
594{
595 /* Compute r = a * R * R * R^-1 mod N = aR mod N */
596 return bn_mod_mul_montgomery(r, a, &mctx->RR, mctx, ctx);
597}
598LCRYPTO_ALIAS(BN_to_montgomery);
599
600int
601BN_from_montgomery(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mctx, BN_CTX *ctx)
602{
603 BIGNUM *tmp;
604 int ret = 0;
605
606 BN_CTX_start(ctx);
607
608 if ((tmp = BN_CTX_get(ctx)) == NULL)
609 goto err;
610 if (!bn_copy(tmp, a))
611 goto err;
612 if (!bn_montgomery_reduce(r, tmp, mctx))
613 goto err;
614
615 ret = 1;
616 err:
617 BN_CTX_end(ctx);
618
619 return ret;
620}
621LCRYPTO_ALIAS(BN_from_montgomery);
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c
deleted file mode 100644
index bdeb9b0fe8..0000000000
--- a/src/lib/libcrypto/bn/bn_mul.c
+++ /dev/null
@@ -1,370 +0,0 @@
1/* $OpenBSD: bn_mul.c,v 1.39 2023/07/08 12:21:58 beck Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <assert.h>
60#include <stdio.h>
61#include <string.h>
62
63#include <openssl/opensslconf.h>
64
65#include "bn_arch.h"
66#include "bn_internal.h"
67#include "bn_local.h"
68
69/*
70 * bn_mul_comba4() computes r[] = a[] * b[] using Comba multiplication
71 * (https://everything2.com/title/Comba+multiplication), where a and b are both
72 * four word arrays, producing an eight word array result.
73 */
74#ifndef HAVE_BN_MUL_COMBA4
75void
76bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
77{
78 BN_ULONG c0, c1, c2;
79
80 bn_mulw_addtw(a[0], b[0], 0, 0, 0, &c2, &c1, &r[0]);
81
82 bn_mulw_addtw(a[0], b[1], 0, c2, c1, &c2, &c1, &c0);
83 bn_mulw_addtw(a[1], b[0], c2, c1, c0, &c2, &c1, &r[1]);
84
85 bn_mulw_addtw(a[2], b[0], 0, c2, c1, &c2, &c1, &c0);
86 bn_mulw_addtw(a[1], b[1], c2, c1, c0, &c2, &c1, &c0);
87 bn_mulw_addtw(a[0], b[2], c2, c1, c0, &c2, &c1, &r[2]);
88
89 bn_mulw_addtw(a[0], b[3], 0, c2, c1, &c2, &c1, &c0);
90 bn_mulw_addtw(a[1], b[2], c2, c1, c0, &c2, &c1, &c0);
91 bn_mulw_addtw(a[2], b[1], c2, c1, c0, &c2, &c1, &c0);
92 bn_mulw_addtw(a[3], b[0], c2, c1, c0, &c2, &c1, &r[3]);
93
94 bn_mulw_addtw(a[3], b[1], 0, c2, c1, &c2, &c1, &c0);
95 bn_mulw_addtw(a[2], b[2], c2, c1, c0, &c2, &c1, &c0);
96 bn_mulw_addtw(a[1], b[3], c2, c1, c0, &c2, &c1, &r[4]);
97
98 bn_mulw_addtw(a[2], b[3], 0, c2, c1, &c2, &c1, &c0);
99 bn_mulw_addtw(a[3], b[2], c2, c1, c0, &c2, &c1, &r[5]);
100
101 bn_mulw_addtw(a[3], b[3], 0, c2, c1, &c2, &r[7], &r[6]);
102}
103#endif
104
105/*
106 * bn_mul_comba8() computes r[] = a[] * b[] using Comba multiplication
107 * (https://everything2.com/title/Comba+multiplication), where a and b are both
108 * eight word arrays, producing a 16 word array result.
109 */
110#ifndef HAVE_BN_MUL_COMBA8
111void
112bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
113{
114 BN_ULONG c0, c1, c2;
115
116 bn_mulw_addtw(a[0], b[0], 0, 0, 0, &c2, &c1, &r[0]);
117
118 bn_mulw_addtw(a[0], b[1], 0, c2, c1, &c2, &c1, &c0);
119 bn_mulw_addtw(a[1], b[0], c2, c1, c0, &c2, &c1, &r[1]);
120
121 bn_mulw_addtw(a[2], b[0], 0, c2, c1, &c2, &c1, &c0);
122 bn_mulw_addtw(a[1], b[1], c2, c1, c0, &c2, &c1, &c0);
123 bn_mulw_addtw(a[0], b[2], c2, c1, c0, &c2, &c1, &r[2]);
124
125 bn_mulw_addtw(a[0], b[3], 0, c2, c1, &c2, &c1, &c0);
126 bn_mulw_addtw(a[1], b[2], c2, c1, c0, &c2, &c1, &c0);
127 bn_mulw_addtw(a[2], b[1], c2, c1, c0, &c2, &c1, &c0);
128 bn_mulw_addtw(a[3], b[0], c2, c1, c0, &c2, &c1, &r[3]);
129
130 bn_mulw_addtw(a[4], b[0], 0, c2, c1, &c2, &c1, &c0);
131 bn_mulw_addtw(a[3], b[1], c2, c1, c0, &c2, &c1, &c0);
132 bn_mulw_addtw(a[2], b[2], c2, c1, c0, &c2, &c1, &c0);
133 bn_mulw_addtw(a[1], b[3], c2, c1, c0, &c2, &c1, &c0);
134 bn_mulw_addtw(a[0], b[4], c2, c1, c0, &c2, &c1, &r[4]);
135
136 bn_mulw_addtw(a[0], b[5], 0, c2, c1, &c2, &c1, &c0);
137 bn_mulw_addtw(a[1], b[4], c2, c1, c0, &c2, &c1, &c0);
138 bn_mulw_addtw(a[2], b[3], c2, c1, c0, &c2, &c1, &c0);
139 bn_mulw_addtw(a[3], b[2], c2, c1, c0, &c2, &c1, &c0);
140 bn_mulw_addtw(a[4], b[1], c2, c1, c0, &c2, &c1, &c0);
141 bn_mulw_addtw(a[5], b[0], c2, c1, c0, &c2, &c1, &r[5]);
142
143 bn_mulw_addtw(a[6], b[0], 0, c2, c1, &c2, &c1, &c0);
144 bn_mulw_addtw(a[5], b[1], c2, c1, c0, &c2, &c1, &c0);
145 bn_mulw_addtw(a[4], b[2], c2, c1, c0, &c2, &c1, &c0);
146 bn_mulw_addtw(a[3], b[3], c2, c1, c0, &c2, &c1, &c0);
147 bn_mulw_addtw(a[2], b[4], c2, c1, c0, &c2, &c1, &c0);
148 bn_mulw_addtw(a[1], b[5], c2, c1, c0, &c2, &c1, &c0);
149 bn_mulw_addtw(a[0], b[6], c2, c1, c0, &c2, &c1, &r[6]);
150
151 bn_mulw_addtw(a[0], b[7], 0, c2, c1, &c2, &c1, &c0);
152 bn_mulw_addtw(a[1], b[6], c2, c1, c0, &c2, &c1, &c0);
153 bn_mulw_addtw(a[2], b[5], c2, c1, c0, &c2, &c1, &c0);
154 bn_mulw_addtw(a[3], b[4], c2, c1, c0, &c2, &c1, &c0);
155 bn_mulw_addtw(a[4], b[3], c2, c1, c0, &c2, &c1, &c0);
156 bn_mulw_addtw(a[5], b[2], c2, c1, c0, &c2, &c1, &c0);
157 bn_mulw_addtw(a[6], b[1], c2, c1, c0, &c2, &c1, &c0);
158 bn_mulw_addtw(a[7], b[0], c2, c1, c0, &c2, &c1, &r[7]);
159
160 bn_mulw_addtw(a[7], b[1], 0, c2, c1, &c2, &c1, &c0);
161 bn_mulw_addtw(a[6], b[2], c2, c1, c0, &c2, &c1, &c0);
162 bn_mulw_addtw(a[5], b[3], c2, c1, c0, &c2, &c1, &c0);
163 bn_mulw_addtw(a[4], b[4], c2, c1, c0, &c2, &c1, &c0);
164 bn_mulw_addtw(a[3], b[5], c2, c1, c0, &c2, &c1, &c0);
165 bn_mulw_addtw(a[2], b[6], c2, c1, c0, &c2, &c1, &c0);
166 bn_mulw_addtw(a[1], b[7], c2, c1, c0, &c2, &c1, &r[8]);
167
168 bn_mulw_addtw(a[2], b[7], 0, c2, c1, &c2, &c1, &c0);
169 bn_mulw_addtw(a[3], b[6], c2, c1, c0, &c2, &c1, &c0);
170 bn_mulw_addtw(a[4], b[5], c2, c1, c0, &c2, &c1, &c0);
171 bn_mulw_addtw(a[5], b[4], c2, c1, c0, &c2, &c1, &c0);
172 bn_mulw_addtw(a[6], b[3], c2, c1, c0, &c2, &c1, &c0);
173 bn_mulw_addtw(a[7], b[2], c2, c1, c0, &c2, &c1, &r[9]);
174
175 bn_mulw_addtw(a[7], b[3], 0, c2, c1, &c2, &c1, &c0);
176 bn_mulw_addtw(a[6], b[4], c2, c1, c0, &c2, &c1, &c0);
177 bn_mulw_addtw(a[5], b[5], c2, c1, c0, &c2, &c1, &c0);
178 bn_mulw_addtw(a[4], b[6], c2, c1, c0, &c2, &c1, &c0);
179 bn_mulw_addtw(a[3], b[7], c2, c1, c0, &c2, &c1, &r[10]);
180
181 bn_mulw_addtw(a[4], b[7], 0, c2, c1, &c2, &c1, &c0);
182 bn_mulw_addtw(a[5], b[6], c2, c1, c0, &c2, &c1, &c0);
183 bn_mulw_addtw(a[6], b[5], c2, c1, c0, &c2, &c1, &c0);
184 bn_mulw_addtw(a[7], b[4], c2, c1, c0, &c2, &c1, &r[11]);
185
186 bn_mulw_addtw(a[7], b[5], 0, c2, c1, &c2, &c1, &c0);
187 bn_mulw_addtw(a[6], b[6], c2, c1, c0, &c2, &c1, &c0);
188 bn_mulw_addtw(a[5], b[7], c2, c1, c0, &c2, &c1, &r[12]);
189
190 bn_mulw_addtw(a[6], b[7], 0, c2, c1, &c2, &c1, &c0);
191 bn_mulw_addtw(a[7], b[6], c2, c1, c0, &c2, &c1, &r[13]);
192
193 bn_mulw_addtw(a[7], b[7], 0, c2, c1, &c2, &r[15], &r[14]);
194}
195#endif
196
197/*
198 * bn_mul_words() computes (carry:r[i]) = a[i] * w + carry, where a is an array
199 * of words and w is a single word. This should really be called bn_mulw_words()
200 * since only one input is an array. This is used as a step in the multiplication
201 * of word arrays.
202 */
203#ifndef HAVE_BN_MUL_WORDS
204BN_ULONG
205bn_mul_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w)
206{
207 BN_ULONG carry = 0;
208
209 assert(num >= 0);
210 if (num <= 0)
211 return 0;
212
213 while (num & ~3) {
214 bn_qwmulw_addw(a[3], a[2], a[1], a[0], w, carry, &carry,
215 &r[3], &r[2], &r[1], &r[0]);
216 a += 4;
217 r += 4;
218 num -= 4;
219 }
220 while (num) {
221 bn_mulw_addw(a[0], w, carry, &carry, &r[0]);
222 a++;
223 r++;
224 num--;
225 }
226 return carry;
227}
228#endif
229
230/*
231 * bn_mul_add_words() computes (carry:r[i]) = a[i] * w + r[i] + carry, where
232 * a is an array of words and w is a single word. This should really be called
233 * bn_mulw_add_words() since only one input is an array. This is used as a step
234 * in the multiplication of word arrays.
235 */
236#ifndef HAVE_BN_MUL_ADD_WORDS
237BN_ULONG
238bn_mul_add_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w)
239{
240 BN_ULONG carry = 0;
241
242 assert(num >= 0);
243 if (num <= 0)
244 return 0;
245
246 while (num & ~3) {
247 bn_qwmulw_addqw_addw(a[3], a[2], a[1], a[0], w,
248 r[3], r[2], r[1], r[0], carry, &carry,
249 &r[3], &r[2], &r[1], &r[0]);
250 a += 4;
251 r += 4;
252 num -= 4;
253 }
254 while (num) {
255 bn_mulw_addw_addw(a[0], w, r[0], carry, &carry, &r[0]);
256 a++;
257 r++;
258 num--;
259 }
260
261 return carry;
262}
263#endif
264
265void
266bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
267{
268 BN_ULONG *rr;
269
270
271 if (na < nb) {
272 int itmp;
273 BN_ULONG *ltmp;
274
275 itmp = na;
276 na = nb;
277 nb = itmp;
278 ltmp = a;
279 a = b;
280 b = ltmp;
281
282 }
283 rr = &(r[na]);
284 if (nb <= 0) {
285 (void)bn_mul_words(r, a, na, 0);
286 return;
287 } else
288 rr[0] = bn_mul_words(r, a, na, b[0]);
289
290 for (;;) {
291 if (--nb <= 0)
292 return;
293 rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
294 if (--nb <= 0)
295 return;
296 rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
297 if (--nb <= 0)
298 return;
299 rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
300 if (--nb <= 0)
301 return;
302 rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
303 rr += 4;
304 r += 4;
305 b += 4;
306 }
307}
308
309
310#ifndef HAVE_BN_MUL
311int
312bn_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, int rn, BN_CTX *ctx)
313{
314 bn_mul_normal(r->d, a->d, a->top, b->d, b->top);
315
316 return 1;
317}
318
319#endif /* HAVE_BN_MUL */
320
321int
322BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
323{
324 BIGNUM *rr;
325 int rn;
326 int ret = 0;
327
328 BN_CTX_start(ctx);
329
330 if (BN_is_zero(a) || BN_is_zero(b)) {
331 BN_zero(r);
332 goto done;
333 }
334
335 rr = r;
336 if (rr == a || rr == b)
337 rr = BN_CTX_get(ctx);
338 if (rr == NULL)
339 goto err;
340
341 rn = a->top + b->top;
342 if (rn < a->top)
343 goto err;
344 if (!bn_wexpand(rr, rn))
345 goto err;
346
347 if (a->top == 4 && b->top == 4) {
348 bn_mul_comba4(rr->d, a->d, b->d);
349 } else if (a->top == 8 && b->top == 8) {
350 bn_mul_comba8(rr->d, a->d, b->d);
351 } else {
352 if (!bn_mul(rr, a, b, rn, ctx))
353 goto err;
354 }
355
356 rr->top = rn;
357 bn_correct_top(rr);
358
359 BN_set_negative(rr, a->neg ^ b->neg);
360
361 if (!bn_copy(r, rr))
362 goto err;
363 done:
364 ret = 1;
365 err:
366 BN_CTX_end(ctx);
367
368 return ret;
369}
370LCRYPTO_ALIAS(BN_mul);
diff --git a/src/lib/libcrypto/bn/bn_prime.c b/src/lib/libcrypto/bn/bn_prime.c
deleted file mode 100644
index 5a4aa50bf1..0000000000
--- a/src/lib/libcrypto/bn/bn_prime.c
+++ /dev/null
@@ -1,423 +0,0 @@
1/* $OpenBSD: bn_prime.c,v 1.34 2023/07/20 06:26:27 tb Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include <stdio.h>
113#include <time.h>
114
115#include <openssl/err.h>
116
117#include "bn_local.h"
118
119/* The quick sieve algorithm approach to weeding out primes is
120 * Philip Zimmermann's, as implemented in PGP. I have had a read of
121 * his comments and implemented my own version.
122 */
123#include "bn_prime.h"
124
125static int probable_prime(BIGNUM *rnd, int bits);
126static int probable_prime_dh(BIGNUM *rnd, int bits,
127 const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
128static int probable_prime_dh_safe(BIGNUM *rnd, int bits,
129 const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
130
131int
132BN_GENCB_call(BN_GENCB *cb, int a, int b)
133{
134 /* No callback means continue */
135 if (!cb)
136 return 1;
137 switch (cb->ver) {
138 case 1:
139 /* Deprecated-style callbacks */
140 if (!cb->cb.cb_1)
141 return 1;
142 cb->cb.cb_1(a, b, cb->arg);
143 return 1;
144 case 2:
145 /* New-style callbacks */
146 return cb->cb.cb_2(a, b, cb);
147 default:
148 break;
149 }
150 /* Unrecognised callback type */
151 return 0;
152}
153LCRYPTO_ALIAS(BN_GENCB_call);
154
155int
156BN_generate_prime_ex(BIGNUM *ret, int bits, int safe, const BIGNUM *add,
157 const BIGNUM *rem, BN_GENCB *cb)
158{
159 BN_CTX *ctx;
160 BIGNUM *p;
161 int is_prime;
162 int loops = 0;
163 int found = 0;
164
165 if (bits < 2 || (bits == 2 && safe)) {
166 /*
167 * There are no prime numbers smaller than 2, and the smallest
168 * safe prime (7) spans three bits.
169 */
170 BNerror(BN_R_BITS_TOO_SMALL);
171 return 0;
172 }
173
174 if ((ctx = BN_CTX_new()) == NULL)
175 goto err;
176 BN_CTX_start(ctx);
177 if ((p = BN_CTX_get(ctx)) == NULL)
178 goto err;
179
180 loop:
181 /* Make a random number and set the top and bottom bits. */
182 if (add == NULL) {
183 if (!probable_prime(ret, bits))
184 goto err;
185 } else {
186 if (safe) {
187 if (!probable_prime_dh_safe(ret, bits, add, rem, ctx))
188 goto err;
189 } else {
190 if (!probable_prime_dh(ret, bits, add, rem, ctx))
191 goto err;
192 }
193 }
194
195 if (!BN_GENCB_call(cb, 0, loops++))
196 goto err;
197
198 if (!safe) {
199 if (!bn_is_prime_bpsw(&is_prime, ret, ctx, 1))
200 goto err;
201 if (!is_prime)
202 goto loop;
203 } else {
204 if (!bn_is_prime_bpsw(&is_prime, ret, ctx, 1))
205 goto err;
206 if (!is_prime)
207 goto loop;
208
209 /*
210 * For safe prime generation, check that p = (ret-1)/2 is prime.
211 * Since this prime has >= 3 bits, it is odd, and we can simply
212 * divide by 2.
213 */
214 if (!BN_rshift1(p, ret))
215 goto err;
216
217 if (!bn_is_prime_bpsw(&is_prime, p, ctx, 1))
218 goto err;
219 if (!is_prime)
220 goto loop;
221
222 if (!BN_GENCB_call(cb, 2, loops - 1))
223 goto err;
224 }
225
226 found = 1;
227
228 err:
229 BN_CTX_end(ctx);
230 BN_CTX_free(ctx);
231
232 return found;
233}
234LCRYPTO_ALIAS(BN_generate_prime_ex);
235
236int
237BN_is_prime_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed, BN_GENCB *cb)
238{
239 return BN_is_prime_fasttest_ex(a, checks, ctx_passed, 0, cb);
240}
241LCRYPTO_ALIAS(BN_is_prime_ex);
242
243#define BN_PRIME_MAXIMUM_BITS (32 * 1024)
244
245int
246BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed,
247 int do_trial_division, BN_GENCB *cb)
248{
249 int is_prime;
250
251 if (checks < 0)
252 return -1;
253
254 /*
255 * Prime numbers this large do not appear in everyday cryptography
256 * and checking such numbers for primality is very expensive.
257 */
258 if (BN_num_bits(a) > BN_PRIME_MAXIMUM_BITS) {
259 BNerror(BN_R_BIGNUM_TOO_LONG);
260 return -1;
261 }
262
263 if (checks == BN_prime_checks)
264 checks = BN_prime_checks_for_size(BN_num_bits(a));
265
266 /* XXX - tickle BN_GENCB in bn_is_prime_bpsw(). */
267 if (!bn_is_prime_bpsw(&is_prime, a, ctx_passed, checks))
268 return -1;
269
270 return is_prime;
271}
272LCRYPTO_ALIAS(BN_is_prime_fasttest_ex);
273
274static int
275probable_prime(BIGNUM *rnd, int bits)
276{
277 int i;
278 BN_ULONG mods[NUMPRIMES];
279 BN_ULONG delta, maxdelta;
280
281again:
282 if (!BN_rand(rnd, bits, 1, 1))
283 return (0);
284 /* we now have a random number 'rand' to test. */
285 for (i = 1; i < NUMPRIMES; i++) {
286 BN_ULONG mod = BN_mod_word(rnd, primes[i]);
287 if (mod == (BN_ULONG)-1)
288 return (0);
289 mods[i] = mod;
290 }
291 maxdelta = BN_MASK2 - primes[NUMPRIMES - 1];
292 delta = 0;
293loop:
294 for (i = 1; i < NUMPRIMES; i++) {
295 /* check that rnd is not a prime and also
296 * that gcd(rnd-1,primes) == 1 (except for 2) */
297 if (((mods[i] + delta) % primes[i]) <= 1) {
298 delta += 2;
299 if (delta > maxdelta)
300 goto again;
301 goto loop;
302 }
303 }
304 if (!BN_add_word(rnd, delta))
305 return (0);
306 return (1);
307}
308
309static int
310probable_prime_dh(BIGNUM *rnd, int bits, const BIGNUM *add, const BIGNUM *rem,
311 BN_CTX *ctx)
312{
313 int i, ret = 0;
314 BIGNUM *t1;
315
316 BN_CTX_start(ctx);
317 if ((t1 = BN_CTX_get(ctx)) == NULL)
318 goto err;
319
320 if (!BN_rand(rnd, bits, 0, 1))
321 goto err;
322
323 /* we need ((rnd-rem) % add) == 0 */
324
325 if (!BN_mod_ct(t1, rnd, add, ctx))
326 goto err;
327 if (!BN_sub(rnd, rnd, t1))
328 goto err;
329 if (rem == NULL) {
330 if (!BN_add_word(rnd, 1))
331 goto err;
332 } else {
333 if (!BN_add(rnd, rnd, rem))
334 goto err;
335 }
336
337 /* we now have a random number 'rand' to test. */
338
339loop:
340 for (i = 1; i < NUMPRIMES; i++) {
341 /* check that rnd is a prime */
342 BN_LONG mod = BN_mod_word(rnd, primes[i]);
343 if (mod == (BN_ULONG)-1)
344 goto err;
345 if (mod <= 1) {
346 if (!BN_add(rnd, rnd, add))
347 goto err;
348 goto loop;
349 }
350 }
351 ret = 1;
352
353err:
354 BN_CTX_end(ctx);
355 return (ret);
356}
357
358static int
359probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd,
360 const BIGNUM *rem, BN_CTX *ctx)
361{
362 int i, ret = 0;
363 BIGNUM *t1, *qadd, *q;
364
365 bits--;
366 BN_CTX_start(ctx);
367 if ((t1 = BN_CTX_get(ctx)) == NULL)
368 goto err;
369 if ((q = BN_CTX_get(ctx)) == NULL)
370 goto err;
371 if ((qadd = BN_CTX_get(ctx)) == NULL)
372 goto err;
373
374 if (!BN_rshift1(qadd, padd))
375 goto err;
376
377 if (!BN_rand(q, bits, 0, 1))
378 goto err;
379
380 /* we need ((rnd-rem) % add) == 0 */
381 if (!BN_mod_ct(t1, q,qadd, ctx))
382 goto err;
383 if (!BN_sub(q, q, t1))
384 goto err;
385 if (rem == NULL) {
386 if (!BN_add_word(q, 1))
387 goto err;
388 } else {
389 if (!BN_rshift1(t1, rem))
390 goto err;
391 if (!BN_add(q, q, t1))
392 goto err;
393 }
394
395 /* we now have a random number 'rand' to test. */
396 if (!BN_lshift1(p, q))
397 goto err;
398 if (!BN_add_word(p, 1))
399 goto err;
400
401loop:
402 for (i = 1; i < NUMPRIMES; i++) {
403 /* check that p and q are prime */
404 /* check that for p and q
405 * gcd(p-1,primes) == 1 (except for 2) */
406 BN_ULONG pmod = BN_mod_word(p, primes[i]);
407 BN_ULONG qmod = BN_mod_word(q, primes[i]);
408 if (pmod == (BN_ULONG)-1 || qmod == (BN_ULONG)-1)
409 goto err;
410 if (pmod == 0 || qmod == 0) {
411 if (!BN_add(p, p, padd))
412 goto err;
413 if (!BN_add(q, q, qadd))
414 goto err;
415 goto loop;
416 }
417 }
418 ret = 1;
419
420err:
421 BN_CTX_end(ctx);
422 return (ret);
423}
diff --git a/src/lib/libcrypto/bn/bn_prime.h b/src/lib/libcrypto/bn/bn_prime.h
deleted file mode 100644
index 4ea2d47948..0000000000
--- a/src/lib/libcrypto/bn/bn_prime.h
+++ /dev/null
@@ -1,14 +0,0 @@
1/* $OpenBSD: bn_prime.h,v 1.9 2022/11/10 10:24:50 tb Exp $ */
2/*
3 * Public domain.
4 */
5
6#include <stdint.h>
7
8__BEGIN_HIDDEN_DECLS
9
10#define NUMPRIMES 2048
11
12extern const uint16_t primes[NUMPRIMES];
13
14__END_HIDDEN_DECLS
diff --git a/src/lib/libcrypto/bn/bn_prime.pl b/src/lib/libcrypto/bn/bn_prime.pl
deleted file mode 100644
index f638e4a9a4..0000000000
--- a/src/lib/libcrypto/bn/bn_prime.pl
+++ /dev/null
@@ -1,100 +0,0 @@
1#!/usr/bin/perl
2# $OpenBSD: bn_prime.pl,v 1.12 2023/03/26 08:04:57 tb Exp $
3#
4# Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
5# All rights reserved.
6#
7# This package is an SSL implementation written
8# by Eric Young (eay@cryptsoft.com).
9# The implementation was written so as to conform with Netscapes SSL.
10#
11# This library is free for commercial and non-commercial use as long as
12# the following conditions are aheared to. The following conditions
13# apply to all code found in this distribution, be it the RC4, RSA,
14# lhash, DES, etc., code; not just the SSL code. The SSL documentation
15# included with this distribution is covered by the same copyright terms
16# except that the holder is Tim Hudson (tjh@cryptsoft.com).
17#
18# Copyright remains Eric Young's, and as such any Copyright notices in
19# the code are not to be removed.
20# If this package is used in a product, Eric Young should be given attribution
21# as the author of the parts of the library used.
22# This can be in the form of a textual message at program startup or
23# in documentation (online or textual) provided with the package.
24#
25# Redistribution and use in source and binary forms, with or without
26# modification, are permitted provided that the following conditions
27# are met:
28# 1. Redistributions of source code must retain the copyright
29# notice, this list of conditions and the following disclaimer.
30# 2. Redistributions in binary form must reproduce the above copyright
31# notice, this list of conditions and the following disclaimer in the
32# documentation and/or other materials provided with the distribution.
33# 3. All advertising materials mentioning features or use of this software
34# must display the following acknowledgement:
35# "This product includes cryptographic software written by
36# Eric Young (eay@cryptsoft.com)"
37# The word 'cryptographic' can be left out if the rouines from the library
38# being used are not cryptographic related :-).
39# 4. If you include any Windows specific code (or a derivative thereof) from
40# the apps directory (application code) you must include an acknowledgement:
41# "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
42#
43# THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
44# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53# SUCH DAMAGE.
54#
55# The licence and distribution terms for any publically available version or
56# derivative of this code cannot be changed. i.e. this code cannot simply be
57# copied and put under another distribution licence
58# [including the GNU Public Licence.]
59
60use strict;
61use warnings;
62
63my ($i, $num, $p, $s, @primes);
64
65$num = 2048;
66$num = $ARGV[0] if $#ARGV >= 0;
67
68# The 6543rd prime is 2^16 + 1.
69die "$num must be smaller than 6543" if $num >= 6543;
70
71push(@primes, 2);
72$p = 1;
73
74loop:
75while ($#primes < $num - 1) {
76 $p += 2;
77 $s = int(sqrt($p));
78
79 for ($i = 0; defined($primes[$i]) && $primes[$i] <= $s; $i++) {
80 next loop if $p % $primes[$i] == 0;
81 }
82
83 die "\$primes[$i] is too large: $primes[$i]" if $primes[$i] > 65535;
84 push(@primes, $p);
85}
86
87printf("/*\t\$" . "OpenBSD" . "\$ */\n");
88print <<\EOF;
89/*
90 * Public domain, generated by bn_prime.pl.
91 */
92
93EOF
94
95print "#include \"bn_prime.h\"\n\n";
96print "const uint16_t primes[NUMPRIMES] = {";
97for ($i = 0; $i <= $#primes; $i++) {
98 printf("%s%5d,", $i % 8 == 0 ? "\n\t" : " ", $primes[$i]);
99}
100print "\n};\n";
diff --git a/src/lib/libcrypto/bn/bn_primitives.c b/src/lib/libcrypto/bn/bn_primitives.c
deleted file mode 100644
index 66427a9046..0000000000
--- a/src/lib/libcrypto/bn/bn_primitives.c
+++ /dev/null
@@ -1,65 +0,0 @@
1/* $OpenBSD: bn_primitives.c,v 1.2 2023/06/21 07:48:41 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <openssl/bn.h>
19
20#include "bn_arch.h"
21#include "bn_internal.h"
22#include "bn_local.h"
23
24#ifndef HAVE_BN_CLZW
25#ifndef HAVE_BN_WORD_CLZ
26int
27bn_word_clz(BN_ULONG w)
28{
29 BN_ULONG bits, mask, shift;
30
31 bits = shift = BN_BITS2;
32 mask = 0;
33
34 while ((shift >>= 1) != 0) {
35 bits += (shift & mask) - (shift & ~mask);
36 mask = bn_ct_ne_zero_mask(w >> bits);
37 }
38 bits += 1 & mask;
39
40 bits -= bn_ct_eq_zero(w);
41
42 return BN_BITS2 - bits;
43}
44#endif
45#endif
46
47#ifndef HAVE_BN_BITSIZE
48int
49bn_bitsize(const BIGNUM *bn)
50{
51 BN_ULONG n = 0, x = 0;
52 BN_ULONG mask, w;
53 int i = 0;
54
55 while (i < bn->top) {
56 w = bn->d[i];
57 mask = bn_ct_ne_zero_mask(w);
58 n = ((BN_ULONG)i & mask) | (n & ~mask);
59 x = (w & mask) | (x & ~mask);
60 i++;
61 }
62
63 return (n + 1) * BN_BITS2 - bn_clzw(x);
64}
65#endif
diff --git a/src/lib/libcrypto/bn/bn_print.c b/src/lib/libcrypto/bn/bn_print.c
deleted file mode 100644
index cd8b663602..0000000000
--- a/src/lib/libcrypto/bn/bn_print.c
+++ /dev/null
@@ -1,191 +0,0 @@
1/* $OpenBSD: bn_print.c,v 1.47 2024/03/02 09:18:28 tb Exp $ */
2
3/*
4 * Copyright (c) 2023 Theo Buehler <tb@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <ctype.h>
20#include <stdarg.h>
21#include <stdio.h>
22#include <stdint.h>
23#include <stdlib.h>
24
25#include <openssl/bio.h>
26#include <openssl/bn.h>
27
28#include "bio_local.h"
29#include "bn_local.h"
30#include "bytestring.h"
31
32static int
33bn_print_zero(BIO *bio, const BIGNUM *bn)
34{
35 if (!BN_is_zero(bn))
36 return 0;
37 if (BIO_printf(bio, " 0\n") <= 0)
38 return 0;
39 return 1;
40}
41
42static int
43bn_print_word(BIO *bio, const BIGNUM *bn)
44{
45 unsigned long long word;
46 const char *neg = "";
47
48 if (BN_is_zero(bn) || BN_num_bytes(bn) > BN_BYTES)
49 return 0;
50
51 if (BN_is_negative(bn))
52 neg = "-";
53
54 word = BN_get_word(bn);
55 if (BIO_printf(bio, " %s%llu (%s0x%llx)\n", neg, word, neg, word) <= 0)
56 return 0;
57
58 return 1;
59}
60
61static int
62bn_print_bignum(BIO *bio, const BIGNUM *bn, int indent)
63{
64 CBS cbs;
65 char *hex = NULL;
66 size_t hex_len = 0;
67 size_t octets = 0;
68 uint8_t hi, lo;
69 const char *sep = ":";
70 int ret = 0;
71
72 if (BN_num_bytes(bn) <= BN_BYTES)
73 goto err;
74
75 /* Secondary indent is 4 spaces, capped at 128. */
76 if (indent > 124)
77 indent = 124;
78 indent += 4;
79 if (indent < 0)
80 indent = 0;
81
82 if (!bn_bn2hex_nosign(bn, &hex, &hex_len))
83 goto err;
84
85 CBS_init(&cbs, hex, hex_len);
86
87 if (BN_is_negative(bn)) {
88 if (BIO_printf(bio, " (Negative)") <= 0)
89 goto err;
90 }
91
92 while (CBS_len(&cbs) > 0) {
93 if (!CBS_get_u8(&cbs, &hi))
94 goto err;
95 if (!CBS_get_u8(&cbs, &lo))
96 goto err;
97 if (octets++ % 15 == 0) {
98 if (BIO_printf(bio, "\n%*s", indent, "") <= 0)
99 goto err;
100 }
101 /* First nibble has the high bit set. Insert leading 0 octet. */
102 if (octets == 1 && hi >= '8') {
103 if (BIO_printf(bio, "00:") <= 0)
104 goto err;
105 octets++;
106 }
107 if (CBS_len(&cbs) == 0)
108 sep = "";
109 if (BIO_printf(bio, "%c%c%s", tolower(hi), tolower(lo), sep) <= 0)
110 goto err;
111 }
112
113 if (BIO_printf(bio, "\n") <= 0)
114 goto err;
115
116 ret = 1;
117
118 err:
119 freezero(hex, hex_len);
120
121 return ret;
122}
123
124int
125bn_printf(BIO *bio, const BIGNUM *bn, int indent, const char *fmt, ...)
126{
127 va_list ap;
128 int rv;
129
130 if (bn == NULL)
131 return 1;
132
133 if (!BIO_indent(bio, indent, 128))
134 return 0;
135
136 va_start(ap, fmt);
137 rv = BIO_vprintf(bio, fmt, ap);
138 va_end(ap);
139 if (rv < 0)
140 return 0;
141
142 if (BN_is_zero(bn))
143 return bn_print_zero(bio, bn);
144
145 if (BN_num_bytes(bn) <= BN_BYTES)
146 return bn_print_word(bio, bn);
147
148 return bn_print_bignum(bio, bn, indent);
149}
150
151int
152BN_print(BIO *bio, const BIGNUM *bn)
153{
154 char *hex = NULL;
155 size_t hex_len = 0;
156 int ret = 0;
157
158 if (!bn_bn2hex_nibbles(bn, &hex, &hex_len))
159 goto err;
160 if (BIO_printf(bio, "%s", hex) <= 0)
161 goto err;
162
163 ret = 1;
164
165 err:
166 freezero(hex, hex_len);
167
168 return ret;
169}
170LCRYPTO_ALIAS(BN_print);
171
172int
173BN_print_fp(FILE *fp, const BIGNUM *bn)
174{
175 char *hex = NULL;
176 size_t hex_len = 0;
177 int ret = 0;
178
179 if (!bn_bn2hex_nibbles(bn, &hex, &hex_len))
180 goto err;
181 if (fprintf(fp, "%s", hex) < 0)
182 goto err;
183
184 ret = 1;
185
186 err:
187 freezero(hex, hex_len);
188
189 return ret;
190}
191LCRYPTO_ALIAS(BN_print_fp);
diff --git a/src/lib/libcrypto/bn/bn_rand.c b/src/lib/libcrypto/bn/bn_rand.c
deleted file mode 100644
index 9cfcd8e2c0..0000000000
--- a/src/lib/libcrypto/bn/bn_rand.c
+++ /dev/null
@@ -1,340 +0,0 @@
1/* $OpenBSD: bn_rand.c,v 1.30 2024/03/16 20:42:33 tb Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include <limits.h>
113#include <stdio.h>
114#include <stdlib.h>
115#include <string.h>
116#include <time.h>
117
118#include <openssl/err.h>
119
120#include "bn_local.h"
121
122static int
123bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom)
124{
125 unsigned char *buf = NULL;
126 int ret = 0, bit, bytes, mask;
127
128 if (rnd == NULL) {
129 BNerror(ERR_R_PASSED_NULL_PARAMETER);
130 return (0);
131 }
132
133 if (bits < 0 || (bits == 1 && top > 0)) {
134 BNerror(BN_R_BITS_TOO_SMALL);
135 return (0);
136 }
137 if (bits > INT_MAX - 7) {
138 BNerror(BN_R_BIGNUM_TOO_LONG);
139 return (0);
140 }
141
142 if (bits == 0) {
143 BN_zero(rnd);
144 return (1);
145 }
146
147 bytes = (bits + 7) / 8;
148 bit = (bits - 1) % 8;
149 mask = 0xff << (bit + 1);
150
151 buf = malloc(bytes);
152 if (buf == NULL) {
153 BNerror(ERR_R_MALLOC_FAILURE);
154 goto err;
155 }
156
157 /* make a random number and set the top and bottom bits */
158 arc4random_buf(buf, bytes);
159
160#if 1
161 if (pseudorand == 2) {
162 /* generate patterns that are more likely to trigger BN
163 library bugs */
164 int i;
165 unsigned char c;
166
167 for (i = 0; i < bytes; i++) {
168 arc4random_buf(&c, 1);
169 if (c >= 128 && i > 0)
170 buf[i] = buf[i - 1];
171 else if (c < 42)
172 buf[i] = 0;
173 else if (c < 84)
174 buf[i] = 255;
175 }
176 }
177#endif
178
179 if (top > 0) {
180 if (bit == 0) {
181 buf[0] = 1;
182 buf[1] |= 0x80;
183 } else {
184 buf[0] |= (3 << (bit - 1));
185 }
186 }
187 if (top == 0)
188 buf[0] |= (1 << bit);
189 buf[0] &= ~mask;
190 if (bottom) /* set bottom bit if requested */
191 buf[bytes - 1] |= 1;
192 if (BN_bin2bn(buf, bytes, rnd) == NULL)
193 goto err;
194 ret = 1;
195
196err:
197 freezero(buf, bytes);
198 return (ret);
199}
200
201int
202BN_rand(BIGNUM *rnd, int bits, int top, int bottom)
203{
204 return bnrand(0, rnd, bits, top, bottom);
205}
206LCRYPTO_ALIAS(BN_rand);
207
208int
209BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom)
210{
211 return bnrand(1, rnd, bits, top, bottom);
212}
213LCRYPTO_ALIAS(BN_pseudo_rand);
214
215#if 1
216int
217BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom)
218{
219 return bnrand(2, rnd, bits, top, bottom);
220}
221#endif
222
223
224/* random number r: 0 <= r < range */
225static int
226bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range)
227{
228 int (*bn_rand)(BIGNUM *, int, int, int) = pseudo ? BN_pseudo_rand : BN_rand;
229 int n;
230 int count = 100;
231
232 if (range->neg || BN_is_zero(range)) {
233 BNerror(BN_R_INVALID_RANGE);
234 return 0;
235 }
236
237 n = BN_num_bits(range); /* n > 0 */
238
239 /* BN_is_bit_set(range, n - 1) always holds */
240
241 if (n == 1)
242 BN_zero(r);
243 else if (!BN_is_bit_set(range, n - 2) && !BN_is_bit_set(range, n - 3)) {
244 /* range = 100..._2,
245 * so 3*range (= 11..._2) is exactly one bit longer than range */
246 do {
247 if (!bn_rand(r, n + 1, -1, 0))
248 return 0;
249 /* If r < 3*range, use r := r MOD range
250 * (which is either r, r - range, or r - 2*range).
251 * Otherwise, iterate once more.
252 * Since 3*range = 11..._2, each iteration succeeds with
253 * probability >= .75. */
254 if (BN_cmp(r, range) >= 0) {
255 if (!BN_sub(r, r, range))
256 return 0;
257 if (BN_cmp(r, range) >= 0)
258 if (!BN_sub(r, r, range))
259 return 0;
260 }
261
262 if (!--count) {
263 BNerror(BN_R_TOO_MANY_ITERATIONS);
264 return 0;
265 }
266
267 } while (BN_cmp(r, range) >= 0);
268 } else {
269 do {
270 /* range = 11..._2 or range = 101..._2 */
271 if (!bn_rand(r, n, -1, 0))
272 return 0;
273
274 if (!--count) {
275 BNerror(BN_R_TOO_MANY_ITERATIONS);
276 return 0;
277 }
278 } while (BN_cmp(r, range) >= 0);
279 }
280
281 return 1;
282}
283
284int
285BN_rand_range(BIGNUM *r, const BIGNUM *range)
286{
287 return bn_rand_range(0, r, range);
288}
289LCRYPTO_ALIAS(BN_rand_range);
290
291int
292bn_rand_in_range(BIGNUM *rnd, const BIGNUM *lower_inc, const BIGNUM *upper_exc)
293{
294 BIGNUM *len;
295 int ret = 0;
296
297 if ((len = BN_new()) == NULL)
298 goto err;
299 if (!BN_sub(len, upper_exc, lower_inc))
300 goto err;
301 if (!BN_rand_range(rnd, len))
302 goto err;
303 if (!BN_add(rnd, rnd, lower_inc))
304 goto err;
305
306 ret = 1;
307
308 err:
309 BN_free(len);
310
311 return ret;
312}
313
314int
315bn_rand_interval(BIGNUM *rnd, BN_ULONG lower_word, const BIGNUM *upper_exc)
316{
317 BIGNUM *lower_inc = NULL;
318 int ret = 0;
319
320 if ((lower_inc = BN_new()) == NULL)
321 goto err;
322 if (!BN_set_word(lower_inc, lower_word))
323 goto err;
324 if (!bn_rand_in_range(rnd, lower_inc, upper_exc))
325 goto err;
326
327 ret = 1;
328
329 err:
330 BN_free(lower_inc);
331
332 return ret;
333}
334
335int
336BN_pseudo_rand_range(BIGNUM *r, const BIGNUM *range)
337{
338 return bn_rand_range(1, r, range);
339}
340LCRYPTO_ALIAS(BN_pseudo_rand_range);
diff --git a/src/lib/libcrypto/bn/bn_recp.c b/src/lib/libcrypto/bn/bn_recp.c
deleted file mode 100644
index e3f22c52a9..0000000000
--- a/src/lib/libcrypto/bn/bn_recp.c
+++ /dev/null
@@ -1,222 +0,0 @@
1/* $OpenBSD: bn_recp.c,v 1.33 2025/02/04 20:22:20 tb Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60
61#include <openssl/err.h>
62
63#include "bn_local.h"
64
65struct bn_recp_ctx_st {
66 BIGNUM *N; /* the divisor */
67 BIGNUM *Nr; /* the reciprocal 2^shift / N */
68 int num_bits; /* number of bits in N */
69 int shift;
70} /* BN_RECP_CTX */;
71
72BN_RECP_CTX *
73BN_RECP_CTX_create(const BIGNUM *N)
74{
75 BN_RECP_CTX *recp;
76
77 if ((recp = calloc(1, sizeof(*recp))) == NULL)
78 goto err;
79
80 if ((recp->N = BN_dup(N)) == NULL)
81 goto err;
82 BN_set_negative(recp->N, 0);
83 recp->num_bits = BN_num_bits(recp->N);
84
85 if ((recp->Nr = BN_new()) == NULL)
86 goto err;
87
88 return recp;
89
90 err:
91 BN_RECP_CTX_free(recp);
92
93 return NULL;
94}
95
96void
97BN_RECP_CTX_free(BN_RECP_CTX *recp)
98{
99 if (recp == NULL)
100 return;
101
102 BN_free(recp->N);
103 BN_free(recp->Nr);
104 freezero(recp, sizeof(*recp));
105}
106
107int
108BN_div_reciprocal(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, BN_RECP_CTX *recp,
109 BN_CTX *ctx)
110{
111 int i, j, ret = 0;
112 BIGNUM *a, *b, *d, *r;
113
114 if (BN_ucmp(m, recp->N) < 0) {
115 if (dv != NULL)
116 BN_zero(dv);
117 if (rem != NULL)
118 return bn_copy(rem, m);
119 return 1;
120 }
121
122 BN_CTX_start(ctx);
123 if ((a = BN_CTX_get(ctx)) == NULL)
124 goto err;
125 if ((b = BN_CTX_get(ctx)) == NULL)
126 goto err;
127
128 if ((d = dv) == NULL)
129 d = BN_CTX_get(ctx);
130 if (d == NULL)
131 goto err;
132
133 if ((r = rem) == NULL)
134 r = BN_CTX_get(ctx);
135 if (r == NULL)
136 goto err;
137
138 /*
139 * We want the remainder. Given input of ABCDEF / ab we need to
140 * multiply ABCDEF by 3 digits of the reciprocal of ab.
141 */
142
143 /* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */
144 i = BN_num_bits(m);
145 j = recp->num_bits << 1;
146 if (j > i)
147 i = j;
148
149 /* Compute Nr := (1 << i) / N if necessary. */
150 if (i != recp->shift) {
151 BN_zero(recp->Nr);
152 if (!BN_set_bit(recp->Nr, i))
153 goto err;
154 if (!BN_div_ct(recp->Nr, NULL, recp->Nr, recp->N, ctx))
155 goto err;
156 recp->shift = i;
157 }
158
159 /*
160 * d := |((m >> BN_num_bits(N)) * recp->Nr) >> (i - BN_num_bits(N))|
161 * = |((m >> BN_num_bits(N)) * (1 << i) / N) >> (i - BN_num_bits(N))|
162 * <= |(m / 2^BN_num_bits(N)) * (2^i / N) * 2^BN_num_bits(N) / 2^i |
163 * = |m / N|
164 */
165 if (!BN_rshift(a, m, recp->num_bits))
166 goto err;
167 if (!BN_mul(b, a, recp->Nr, ctx))
168 goto err;
169 if (!BN_rshift(d, b, i - recp->num_bits))
170 goto err;
171 d->neg = 0;
172
173 if (!BN_mul(b, recp->N, d, ctx))
174 goto err;
175 if (!BN_usub(r, m, b))
176 goto err;
177 r->neg = 0;
178
179#if 1
180 j = 0;
181 while (BN_ucmp(r, recp->N) >= 0) {
182 if (j++ > 2) {
183 BNerror(BN_R_BAD_RECIPROCAL);
184 goto err;
185 }
186 if (!BN_usub(r, r, recp->N))
187 goto err;
188 if (!BN_add_word(d, 1))
189 goto err;
190 }
191#endif
192
193 BN_set_negative(r, m->neg);
194 BN_set_negative(d, m->neg ^ recp->N->neg);
195
196 ret = 1;
197
198err:
199 BN_CTX_end(ctx);
200 return ret;
201}
202
203/* Compute r = (x * y) % m. */
204int
205BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
206 BN_RECP_CTX *recp, BN_CTX *ctx)
207{
208 if (!BN_mul(r, x, y, ctx))
209 return 0;
210
211 return BN_div_reciprocal(NULL, r, r, recp, ctx);
212}
213
214/* Compute r = x^2 % m. */
215int
216BN_mod_sqr_reciprocal(BIGNUM *r, const BIGNUM *x, BN_RECP_CTX *recp, BN_CTX *ctx)
217{
218 if (!BN_sqr(r, x, ctx))
219 return 0;
220
221 return BN_div_reciprocal(NULL, r, r, recp, ctx);
222}
diff --git a/src/lib/libcrypto/bn/bn_shift.c b/src/lib/libcrypto/bn/bn_shift.c
deleted file mode 100644
index 12edc7c0a0..0000000000
--- a/src/lib/libcrypto/bn/bn_shift.c
+++ /dev/null
@@ -1,175 +0,0 @@
1/* $OpenBSD: bn_shift.c,v 1.22 2023/07/08 12:21:58 beck Exp $ */
2/*
3 * Copyright (c) 2022, 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <openssl/bn.h>
19#include <openssl/err.h>
20
21#include "bn_local.h"
22
23static inline int
24bn_lshift(BIGNUM *r, const BIGNUM *a, int n)
25{
26 size_t count, shift_bits, shift_words;
27 size_t lshift, rshift;
28 ssize_t rstride;
29 BN_ULONG *dst, *src;
30
31 if (n < 0) {
32 BNerror(BN_R_INVALID_LENGTH);
33 return 0;
34 }
35 shift_bits = n;
36
37 /*
38 * Left bit shift, potentially across word boundaries.
39 *
40 * When shift is not an exact multiple of BN_BITS2, the bottom bits of
41 * the previous word need to be right shifted and combined with the left
42 * shifted bits using bitwise OR. If shift is an exact multiple of
43 * BN_BITS2, the source for the left and right shifts are the same
44 * and the shifts become zero bits (which is effectively a memmove).
45 */
46 shift_words = shift_bits / BN_BITS2;
47 lshift = shift_bits % BN_BITS2;
48 rshift = (BN_BITS2 - lshift) % BN_BITS2;
49 rstride = 0 - (lshift + rshift) / BN_BITS2;
50
51 if (a->top < 1) {
52 BN_zero(r);
53 return 1;
54 }
55
56 count = a->top + shift_words + 1;
57
58 if (count < shift_words)
59 return 0;
60
61 if (!bn_wexpand(r, count))
62 return 0;
63
64 src = a->d + a->top - 1;
65 dst = r->d + a->top + shift_words;
66
67 /* Handle right shift for top most word. */
68 *dst = (*src >> rshift) & rstride;
69 dst--;
70
71 /* Handle left shift and right shift for remaining words. */
72 while (src > a->d) {
73 *dst = *src << lshift | src[rstride] >> rshift;
74 src--;
75 dst--;
76 }
77 *dst = *src << lshift;
78
79 /* Zero any additional words resulting from the left shift. */
80 while (dst > r->d) {
81 dst--;
82 *dst = 0;
83 }
84
85 r->top = count;
86 bn_correct_top(r);
87
88 BN_set_negative(r, a->neg);
89
90 return 1;
91}
92
93static inline int
94bn_rshift(BIGNUM *r, const BIGNUM *a, int n)
95{
96 size_t count, shift_bits, shift_words;
97 size_t lshift, rshift;
98 ssize_t lstride;
99 BN_ULONG *dst, *src;
100 size_t i;
101
102 if (n < 0) {
103 BNerror(BN_R_INVALID_LENGTH);
104 return 0;
105 }
106 shift_bits = n;
107
108 /*
109 * Right bit shift, potentially across word boundaries.
110 *
111 * When shift is not an exact multiple of BN_BITS2, the top bits of
112 * the next word need to be left shifted and combined with the right
113 * shifted bits using bitwise OR. If shift is an exact multiple of
114 * BN_BITS2, the source for the left and right shifts are the same
115 * and the shifts become zero (which is effectively a memmove).
116 */
117 shift_words = shift_bits / BN_BITS2;
118 rshift = shift_bits % BN_BITS2;
119 lshift = (BN_BITS2 - rshift) % BN_BITS2;
120 lstride = (lshift + rshift) / BN_BITS2;
121
122 if (a->top <= shift_words) {
123 BN_zero(r);
124 return 1;
125 }
126 count = a->top - shift_words;
127
128 if (!bn_wexpand(r, count))
129 return 0;
130
131 src = a->d + shift_words;
132 dst = r->d;
133
134 for (i = 1; i < count; i++) {
135 *dst = src[lstride] << lshift | *src >> rshift;
136 src++;
137 dst++;
138 }
139 *dst = *src >> rshift;
140
141 r->top = count;
142 bn_correct_top(r);
143
144 BN_set_negative(r, a->neg);
145
146 return 1;
147}
148
149int
150BN_lshift1(BIGNUM *r, const BIGNUM *a)
151{
152 return bn_lshift(r, a, 1);
153}
154LCRYPTO_ALIAS(BN_lshift1);
155
156int
157BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
158{
159 return bn_lshift(r, a, n);
160}
161LCRYPTO_ALIAS(BN_lshift);
162
163int
164BN_rshift1(BIGNUM *r, const BIGNUM *a)
165{
166 return bn_rshift(r, a, 1);
167}
168LCRYPTO_ALIAS(BN_rshift1);
169
170int
171BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
172{
173 return bn_rshift(r, a, n);
174}
175LCRYPTO_ALIAS(BN_rshift);
diff --git a/src/lib/libcrypto/bn/bn_small_primes.c b/src/lib/libcrypto/bn/bn_small_primes.c
deleted file mode 100644
index bfb7903a54..0000000000
--- a/src/lib/libcrypto/bn/bn_small_primes.c
+++ /dev/null
@@ -1,265 +0,0 @@
1/* $OpenBSD: bn_small_primes.c,v 1.1 2022/11/09 22:52:51 tb Exp $ */
2/*
3 * Public domain, generated by bn_prime.pl.
4 */
5
6#include "bn_prime.h"
7
8const uint16_t primes[NUMPRIMES] = {
9 2, 3, 5, 7, 11, 13, 17, 19,
10 23, 29, 31, 37, 41, 43, 47, 53,
11 59, 61, 67, 71, 73, 79, 83, 89,
12 97, 101, 103, 107, 109, 113, 127, 131,
13 137, 139, 149, 151, 157, 163, 167, 173,
14 179, 181, 191, 193, 197, 199, 211, 223,
15 227, 229, 233, 239, 241, 251, 257, 263,
16 269, 271, 277, 281, 283, 293, 307, 311,
17 313, 317, 331, 337, 347, 349, 353, 359,
18 367, 373, 379, 383, 389, 397, 401, 409,
19 419, 421, 431, 433, 439, 443, 449, 457,
20 461, 463, 467, 479, 487, 491, 499, 503,
21 509, 521, 523, 541, 547, 557, 563, 569,
22 571, 577, 587, 593, 599, 601, 607, 613,
23 617, 619, 631, 641, 643, 647, 653, 659,
24 661, 673, 677, 683, 691, 701, 709, 719,
25 727, 733, 739, 743, 751, 757, 761, 769,
26 773, 787, 797, 809, 811, 821, 823, 827,
27 829, 839, 853, 857, 859, 863, 877, 881,
28 883, 887, 907, 911, 919, 929, 937, 941,
29 947, 953, 967, 971, 977, 983, 991, 997,
30 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049,
31 1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097,
32 1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163,
33 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223,
34 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283,
35 1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321,
36 1327, 1361, 1367, 1373, 1381, 1399, 1409, 1423,
37 1427, 1429, 1433, 1439, 1447, 1451, 1453, 1459,
38 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511,
39 1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571,
40 1579, 1583, 1597, 1601, 1607, 1609, 1613, 1619,
41 1621, 1627, 1637, 1657, 1663, 1667, 1669, 1693,
42 1697, 1699, 1709, 1721, 1723, 1733, 1741, 1747,
43 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811,
44 1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877,
45 1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949,
46 1951, 1973, 1979, 1987, 1993, 1997, 1999, 2003,
47 2011, 2017, 2027, 2029, 2039, 2053, 2063, 2069,
48 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129,
49 2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203,
50 2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267,
51 2269, 2273, 2281, 2287, 2293, 2297, 2309, 2311,
52 2333, 2339, 2341, 2347, 2351, 2357, 2371, 2377,
53 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423,
54 2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503,
55 2521, 2531, 2539, 2543, 2549, 2551, 2557, 2579,
56 2591, 2593, 2609, 2617, 2621, 2633, 2647, 2657,
57 2659, 2663, 2671, 2677, 2683, 2687, 2689, 2693,
58 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741,
59 2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801,
60 2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861,
61 2879, 2887, 2897, 2903, 2909, 2917, 2927, 2939,
62 2953, 2957, 2963, 2969, 2971, 2999, 3001, 3011,
63 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079,
64 3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167,
65 3169, 3181, 3187, 3191, 3203, 3209, 3217, 3221,
66 3229, 3251, 3253, 3257, 3259, 3271, 3299, 3301,
67 3307, 3313, 3319, 3323, 3329, 3331, 3343, 3347,
68 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413,
69 3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491,
70 3499, 3511, 3517, 3527, 3529, 3533, 3539, 3541,
71 3547, 3557, 3559, 3571, 3581, 3583, 3593, 3607,
72 3613, 3617, 3623, 3631, 3637, 3643, 3659, 3671,
73 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727,
74 3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797,
75 3803, 3821, 3823, 3833, 3847, 3851, 3853, 3863,
76 3877, 3881, 3889, 3907, 3911, 3917, 3919, 3923,
77 3929, 3931, 3943, 3947, 3967, 3989, 4001, 4003,
78 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057,
79 4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129,
80 4133, 4139, 4153, 4157, 4159, 4177, 4201, 4211,
81 4217, 4219, 4229, 4231, 4241, 4243, 4253, 4259,
82 4261, 4271, 4273, 4283, 4289, 4297, 4327, 4337,
83 4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409,
84 4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481,
85 4483, 4493, 4507, 4513, 4517, 4519, 4523, 4547,
86 4549, 4561, 4567, 4583, 4591, 4597, 4603, 4621,
87 4637, 4639, 4643, 4649, 4651, 4657, 4663, 4673,
88 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751,
89 4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813,
90 4817, 4831, 4861, 4871, 4877, 4889, 4903, 4909,
91 4919, 4931, 4933, 4937, 4943, 4951, 4957, 4967,
92 4969, 4973, 4987, 4993, 4999, 5003, 5009, 5011,
93 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087,
94 5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167,
95 5171, 5179, 5189, 5197, 5209, 5227, 5231, 5233,
96 5237, 5261, 5273, 5279, 5281, 5297, 5303, 5309,
97 5323, 5333, 5347, 5351, 5381, 5387, 5393, 5399,
98 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443,
99 5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507,
100 5519, 5521, 5527, 5531, 5557, 5563, 5569, 5573,
101 5581, 5591, 5623, 5639, 5641, 5647, 5651, 5653,
102 5657, 5659, 5669, 5683, 5689, 5693, 5701, 5711,
103 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791,
104 5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849,
105 5851, 5857, 5861, 5867, 5869, 5879, 5881, 5897,
106 5903, 5923, 5927, 5939, 5953, 5981, 5987, 6007,
107 6011, 6029, 6037, 6043, 6047, 6053, 6067, 6073,
108 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133,
109 6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211,
110 6217, 6221, 6229, 6247, 6257, 6263, 6269, 6271,
111 6277, 6287, 6299, 6301, 6311, 6317, 6323, 6329,
112 6337, 6343, 6353, 6359, 6361, 6367, 6373, 6379,
113 6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473,
114 6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563,
115 6569, 6571, 6577, 6581, 6599, 6607, 6619, 6637,
116 6653, 6659, 6661, 6673, 6679, 6689, 6691, 6701,
117 6703, 6709, 6719, 6733, 6737, 6761, 6763, 6779,
118 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833,
119 6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907,
120 6911, 6917, 6947, 6949, 6959, 6961, 6967, 6971,
121 6977, 6983, 6991, 6997, 7001, 7013, 7019, 7027,
122 7039, 7043, 7057, 7069, 7079, 7103, 7109, 7121,
123 7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207,
124 7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253,
125 7283, 7297, 7307, 7309, 7321, 7331, 7333, 7349,
126 7351, 7369, 7393, 7411, 7417, 7433, 7451, 7457,
127 7459, 7477, 7481, 7487, 7489, 7499, 7507, 7517,
128 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561,
129 7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621,
130 7639, 7643, 7649, 7669, 7673, 7681, 7687, 7691,
131 7699, 7703, 7717, 7723, 7727, 7741, 7753, 7757,
132 7759, 7789, 7793, 7817, 7823, 7829, 7841, 7853,
133 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919,
134 7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009,
135 8011, 8017, 8039, 8053, 8059, 8069, 8081, 8087,
136 8089, 8093, 8101, 8111, 8117, 8123, 8147, 8161,
137 8167, 8171, 8179, 8191, 8209, 8219, 8221, 8231,
138 8233, 8237, 8243, 8263, 8269, 8273, 8287, 8291,
139 8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369,
140 8377, 8387, 8389, 8419, 8423, 8429, 8431, 8443,
141 8447, 8461, 8467, 8501, 8513, 8521, 8527, 8537,
142 8539, 8543, 8563, 8573, 8581, 8597, 8599, 8609,
143 8623, 8627, 8629, 8641, 8647, 8663, 8669, 8677,
144 8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731,
145 8737, 8741, 8747, 8753, 8761, 8779, 8783, 8803,
146 8807, 8819, 8821, 8831, 8837, 8839, 8849, 8861,
147 8863, 8867, 8887, 8893, 8923, 8929, 8933, 8941,
148 8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011,
149 9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091,
150 9103, 9109, 9127, 9133, 9137, 9151, 9157, 9161,
151 9173, 9181, 9187, 9199, 9203, 9209, 9221, 9227,
152 9239, 9241, 9257, 9277, 9281, 9283, 9293, 9311,
153 9319, 9323, 9337, 9341, 9343, 9349, 9371, 9377,
154 9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433,
155 9437, 9439, 9461, 9463, 9467, 9473, 9479, 9491,
156 9497, 9511, 9521, 9533, 9539, 9547, 9551, 9587,
157 9601, 9613, 9619, 9623, 9629, 9631, 9643, 9649,
158 9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733,
159 9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791,
160 9803, 9811, 9817, 9829, 9833, 9839, 9851, 9857,
161 9859, 9871, 9883, 9887, 9901, 9907, 9923, 9929,
162 9931, 9941, 9949, 9967, 9973, 10007, 10009, 10037,
163 10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099,
164 10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163,
165 10169, 10177, 10181, 10193, 10211, 10223, 10243, 10247,
166 10253, 10259, 10267, 10271, 10273, 10289, 10301, 10303,
167 10313, 10321, 10331, 10333, 10337, 10343, 10357, 10369,
168 10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459,
169 10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531,
170 10559, 10567, 10589, 10597, 10601, 10607, 10613, 10627,
171 10631, 10639, 10651, 10657, 10663, 10667, 10687, 10691,
172 10709, 10711, 10723, 10729, 10733, 10739, 10753, 10771,
173 10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859,
174 10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937,
175 10939, 10949, 10957, 10973, 10979, 10987, 10993, 11003,
176 11027, 11047, 11057, 11059, 11069, 11071, 11083, 11087,
177 11093, 11113, 11117, 11119, 11131, 11149, 11159, 11161,
178 11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251,
179 11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317,
180 11321, 11329, 11351, 11353, 11369, 11383, 11393, 11399,
181 11411, 11423, 11437, 11443, 11447, 11467, 11471, 11483,
182 11489, 11491, 11497, 11503, 11519, 11527, 11549, 11551,
183 11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657,
184 11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731,
185 11743, 11777, 11779, 11783, 11789, 11801, 11807, 11813,
186 11821, 11827, 11831, 11833, 11839, 11863, 11867, 11887,
187 11897, 11903, 11909, 11923, 11927, 11933, 11939, 11941,
188 11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011,
189 12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101,
190 12107, 12109, 12113, 12119, 12143, 12149, 12157, 12161,
191 12163, 12197, 12203, 12211, 12227, 12239, 12241, 12251,
192 12253, 12263, 12269, 12277, 12281, 12289, 12301, 12323,
193 12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401,
194 12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473,
195 12479, 12487, 12491, 12497, 12503, 12511, 12517, 12527,
196 12539, 12541, 12547, 12553, 12569, 12577, 12583, 12589,
197 12601, 12611, 12613, 12619, 12637, 12641, 12647, 12653,
198 12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739,
199 12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821,
200 12823, 12829, 12841, 12853, 12889, 12893, 12899, 12907,
201 12911, 12917, 12919, 12923, 12941, 12953, 12959, 12967,
202 12973, 12979, 12983, 13001, 13003, 13007, 13009, 13033,
203 13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109,
204 13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177,
205 13183, 13187, 13217, 13219, 13229, 13241, 13249, 13259,
206 13267, 13291, 13297, 13309, 13313, 13327, 13331, 13337,
207 13339, 13367, 13381, 13397, 13399, 13411, 13417, 13421,
208 13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499,
209 13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597,
210 13613, 13619, 13627, 13633, 13649, 13669, 13679, 13681,
211 13687, 13691, 13693, 13697, 13709, 13711, 13721, 13723,
212 13729, 13751, 13757, 13759, 13763, 13781, 13789, 13799,
213 13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879,
214 13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933,
215 13963, 13967, 13997, 13999, 14009, 14011, 14029, 14033,
216 14051, 14057, 14071, 14081, 14083, 14087, 14107, 14143,
217 14149, 14153, 14159, 14173, 14177, 14197, 14207, 14221,
218 14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323,
219 14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407,
220 14411, 14419, 14423, 14431, 14437, 14447, 14449, 14461,
221 14479, 14489, 14503, 14519, 14533, 14537, 14543, 14549,
222 14551, 14557, 14561, 14563, 14591, 14593, 14621, 14627,
223 14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699,
224 14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753,
225 14759, 14767, 14771, 14779, 14783, 14797, 14813, 14821,
226 14827, 14831, 14843, 14851, 14867, 14869, 14879, 14887,
227 14891, 14897, 14923, 14929, 14939, 14947, 14951, 14957,
228 14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073,
229 15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137,
230 15139, 15149, 15161, 15173, 15187, 15193, 15199, 15217,
231 15227, 15233, 15241, 15259, 15263, 15269, 15271, 15277,
232 15287, 15289, 15299, 15307, 15313, 15319, 15329, 15331,
233 15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401,
234 15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473,
235 15493, 15497, 15511, 15527, 15541, 15551, 15559, 15569,
236 15581, 15583, 15601, 15607, 15619, 15629, 15641, 15643,
237 15647, 15649, 15661, 15667, 15671, 15679, 15683, 15727,
238 15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773,
239 15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859,
240 15877, 15881, 15887, 15889, 15901, 15907, 15913, 15919,
241 15923, 15937, 15959, 15971, 15973, 15991, 16001, 16007,
242 16033, 16057, 16061, 16063, 16067, 16069, 16073, 16087,
243 16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183,
244 16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249,
245 16253, 16267, 16273, 16301, 16319, 16333, 16339, 16349,
246 16361, 16363, 16369, 16381, 16411, 16417, 16421, 16427,
247 16433, 16447, 16451, 16453, 16477, 16481, 16487, 16493,
248 16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603,
249 16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661,
250 16673, 16691, 16693, 16699, 16703, 16729, 16741, 16747,
251 16759, 16763, 16787, 16811, 16823, 16829, 16831, 16843,
252 16871, 16879, 16883, 16889, 16901, 16903, 16921, 16927,
253 16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993,
254 17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053,
255 17077, 17093, 17099, 17107, 17117, 17123, 17137, 17159,
256 17167, 17183, 17189, 17191, 17203, 17207, 17209, 17231,
257 17239, 17257, 17291, 17293, 17299, 17317, 17321, 17327,
258 17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389,
259 17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467,
260 17471, 17477, 17483, 17489, 17491, 17497, 17509, 17519,
261 17539, 17551, 17569, 17573, 17579, 17581, 17597, 17599,
262 17609, 17623, 17627, 17657, 17659, 17669, 17681, 17683,
263 17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783,
264 17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863,
265};
diff --git a/src/lib/libcrypto/bn/bn_sqr.c b/src/lib/libcrypto/bn/bn_sqr.c
deleted file mode 100644
index 0dbccbf85d..0000000000
--- a/src/lib/libcrypto/bn/bn_sqr.c
+++ /dev/null
@@ -1,305 +0,0 @@
1/* $OpenBSD: bn_sqr.c,v 1.36 2023/07/08 12:21:58 beck Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <assert.h>
60#include <stdio.h>
61#include <string.h>
62
63#include "bn_arch.h"
64#include "bn_local.h"
65#include "bn_internal.h"
66
67int bn_sqr(BIGNUM *r, const BIGNUM *a, int max, BN_CTX *ctx);
68
69/*
70 * bn_sqr_comba4() computes r[] = a[] * a[] using Comba multiplication
71 * (https://everything2.com/title/Comba+multiplication), where a is a
72 * four word array, producing an eight word array result.
73 */
74#ifndef HAVE_BN_SQR_COMBA4
75void
76bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
77{
78 BN_ULONG c2, c1, c0;
79
80 bn_mulw_addtw(a[0], a[0], 0, 0, 0, &c2, &c1, &r[0]);
81
82 bn_mul2_mulw_addtw(a[1], a[0], 0, c2, c1, &c2, &c1, &r[1]);
83
84 bn_mulw_addtw(a[1], a[1], 0, c2, c1, &c2, &c1, &c0);
85 bn_mul2_mulw_addtw(a[2], a[0], c2, c1, c0, &c2, &c1, &r[2]);
86
87 bn_mul2_mulw_addtw(a[3], a[0], 0, c2, c1, &c2, &c1, &c0);
88 bn_mul2_mulw_addtw(a[2], a[1], c2, c1, c0, &c2, &c1, &r[3]);
89
90 bn_mulw_addtw(a[2], a[2], 0, c2, c1, &c2, &c1, &c0);
91 bn_mul2_mulw_addtw(a[3], a[1], c2, c1, c0, &c2, &c1, &r[4]);
92
93 bn_mul2_mulw_addtw(a[3], a[2], 0, c2, c1, &c2, &c1, &r[5]);
94
95 bn_mulw_addtw(a[3], a[3], 0, c2, c1, &c2, &r[7], &r[6]);
96}
97#endif
98
99/*
100 * bn_sqr_comba8() computes r[] = a[] * a[] using Comba multiplication
101 * (https://everything2.com/title/Comba+multiplication), where a is an
102 * eight word array, producing an 16 word array result.
103 */
104#ifndef HAVE_BN_SQR_COMBA8
105void
106bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
107{
108 BN_ULONG c2, c1, c0;
109
110 bn_mulw_addtw(a[0], a[0], 0, 0, 0, &c2, &c1, &r[0]);
111
112 bn_mul2_mulw_addtw(a[1], a[0], 0, c2, c1, &c2, &c1, &r[1]);
113
114 bn_mulw_addtw(a[1], a[1], 0, c2, c1, &c2, &c1, &c0);
115 bn_mul2_mulw_addtw(a[2], a[0], c2, c1, c0, &c2, &c1, &r[2]);
116
117 bn_mul2_mulw_addtw(a[3], a[0], 0, c2, c1, &c2, &c1, &c0);
118 bn_mul2_mulw_addtw(a[2], a[1], c2, c1, c0, &c2, &c1, &r[3]);
119
120 bn_mulw_addtw(a[2], a[2], 0, c2, c1, &c2, &c1, &c0);
121 bn_mul2_mulw_addtw(a[3], a[1], c2, c1, c0, &c2, &c1, &c0);
122 bn_mul2_mulw_addtw(a[4], a[0], c2, c1, c0, &c2, &c1, &r[4]);
123
124 bn_mul2_mulw_addtw(a[5], a[0], 0, c2, c1, &c2, &c1, &c0);
125 bn_mul2_mulw_addtw(a[4], a[1], c2, c1, c0, &c2, &c1, &c0);
126 bn_mul2_mulw_addtw(a[3], a[2], c2, c1, c0, &c2, &c1, &r[5]);
127
128 bn_mulw_addtw(a[3], a[3], 0, c2, c1, &c2, &c1, &c0);
129 bn_mul2_mulw_addtw(a[4], a[2], c2, c1, c0, &c2, &c1, &c0);
130 bn_mul2_mulw_addtw(a[5], a[1], c2, c1, c0, &c2, &c1, &c0);
131 bn_mul2_mulw_addtw(a[6], a[0], c2, c1, c0, &c2, &c1, &r[6]);
132
133 bn_mul2_mulw_addtw(a[7], a[0], 0, c2, c1, &c2, &c1, &c0);
134 bn_mul2_mulw_addtw(a[6], a[1], c2, c1, c0, &c2, &c1, &c0);
135 bn_mul2_mulw_addtw(a[5], a[2], c2, c1, c0, &c2, &c1, &c0);
136 bn_mul2_mulw_addtw(a[4], a[3], c2, c1, c0, &c2, &c1, &r[7]);
137
138 bn_mulw_addtw(a[4], a[4], 0, c2, c1, &c2, &c1, &c0);
139 bn_mul2_mulw_addtw(a[5], a[3], c2, c1, c0, &c2, &c1, &c0);
140 bn_mul2_mulw_addtw(a[6], a[2], c2, c1, c0, &c2, &c1, &c0);
141 bn_mul2_mulw_addtw(a[7], a[1], c2, c1, c0, &c2, &c1, &r[8]);
142
143 bn_mul2_mulw_addtw(a[7], a[2], 0, c2, c1, &c2, &c1, &c0);
144 bn_mul2_mulw_addtw(a[6], a[3], c2, c1, c0, &c2, &c1, &c0);
145 bn_mul2_mulw_addtw(a[5], a[4], c2, c1, c0, &c2, &c1, &r[9]);
146
147 bn_mulw_addtw(a[5], a[5], 0, c2, c1, &c2, &c1, &c0);
148 bn_mul2_mulw_addtw(a[6], a[4], c2, c1, c0, &c2, &c1, &c0);
149 bn_mul2_mulw_addtw(a[7], a[3], c2, c1, c0, &c2, &c1, &r[10]);
150
151 bn_mul2_mulw_addtw(a[7], a[4], 0, c2, c1, &c2, &c1, &c0);
152 bn_mul2_mulw_addtw(a[6], a[5], c2, c1, c0, &c2, &c1, &r[11]);
153
154 bn_mulw_addtw(a[6], a[6], 0, c2, c1, &c2, &c1, &c0);
155 bn_mul2_mulw_addtw(a[7], a[5], c2, c1, c0, &c2, &c1, &r[12]);
156
157 bn_mul2_mulw_addtw(a[7], a[6], 0, c2, c1, &c2, &c1, &r[13]);
158
159 bn_mulw_addtw(a[7], a[7], 0, c2, c1, &c2, &r[15], &r[14]);
160}
161#endif
162
163#ifndef HAVE_BN_SQR
164/*
165 * bn_sqr_add_words() computes (r[i*2+1]:r[i*2]) = (r[i*2+1]:r[i*2]) + a[i] * a[i].
166 */
167static void
168bn_sqr_add_words(BN_ULONG *r, const BN_ULONG *a, int n)
169{
170 BN_ULONG x3, x2, x1, x0;
171 BN_ULONG carry = 0;
172
173 assert(n >= 0);
174 if (n <= 0)
175 return;
176
177 while (n & ~3) {
178 bn_mulw(a[0], a[0], &x1, &x0);
179 bn_mulw(a[1], a[1], &x3, &x2);
180 bn_qwaddqw(x3, x2, x1, x0, r[3], r[2], r[1], r[0], carry,
181 &carry, &r[3], &r[2], &r[1], &r[0]);
182 bn_mulw(a[2], a[2], &x1, &x0);
183 bn_mulw(a[3], a[3], &x3, &x2);
184 bn_qwaddqw(x3, x2, x1, x0, r[7], r[6], r[5], r[4], carry,
185 &carry, &r[7], &r[6], &r[5], &r[4]);
186
187 a += 4;
188 r += 8;
189 n -= 4;
190 }
191 while (n) {
192 bn_mulw_addw_addw(a[0], a[0], r[0], carry, &carry, &r[0]);
193 bn_addw(r[1], carry, &carry, &r[1]);
194 a++;
195 r += 2;
196 n--;
197 }
198}
199
200static void
201bn_sqr_normal(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len)
202{
203 const BN_ULONG *ap;
204 BN_ULONG *rp;
205 BN_ULONG w;
206 int n;
207
208 if (a_len <= 0)
209 return;
210
211 ap = a;
212 w = ap[0];
213 ap++;
214
215 rp = r;
216 rp[0] = rp[r_len - 1] = 0;
217 rp++;
218
219 /* Compute initial product - r[n:1] = a[n:1] * a[0] */
220 n = a_len - 1;
221 if (n > 0) {
222 rp[n] = bn_mul_words(rp, ap, n, w);
223 }
224 rp += 2;
225 n--;
226
227 /* Compute and sum remaining products. */
228 while (n > 0) {
229 w = ap[0];
230 ap++;
231
232 rp[n] = bn_mul_add_words(rp, ap, n, w);
233 rp += 2;
234 n--;
235 }
236
237 /* Double the sum of products. */
238 bn_add_words(r, r, r, r_len);
239
240 /* Add squares. */
241 bn_sqr_add_words(r, a, a_len);
242}
243
244/*
245 * bn_sqr() computes a * a, storing the result in r. The caller must ensure that
246 * r is not the same BIGNUM as a and that r has been expanded to rn = a->top * 2
247 * words.
248 */
249int
250bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx)
251{
252 bn_sqr_normal(r->d, r_len, a->d, a->top);
253
254 return 1;
255}
256#endif
257
258int
259BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
260{
261 BIGNUM *rr;
262 int r_len;
263 int ret = 1;
264
265 BN_CTX_start(ctx);
266
267 if (a->top < 1) {
268 BN_zero(r);
269 goto done;
270 }
271
272 if ((rr = r) == a)
273 rr = BN_CTX_get(ctx);
274 if (rr == NULL)
275 goto err;
276
277 if ((r_len = a->top * 2) < a->top)
278 goto err;
279 if (!bn_wexpand(rr, r_len))
280 goto err;
281
282 if (a->top == 4) {
283 bn_sqr_comba4(rr->d, a->d);
284 } else if (a->top == 8) {
285 bn_sqr_comba8(rr->d, a->d);
286 } else {
287 if (!bn_sqr(rr, a, r_len, ctx))
288 goto err;
289 }
290
291 rr->top = r_len;
292 bn_correct_top(rr);
293
294 rr->neg = 0;
295
296 if (!bn_copy(r, rr))
297 goto err;
298 done:
299 ret = 1;
300 err:
301 BN_CTX_end(ctx);
302
303 return ret;
304}
305LCRYPTO_ALIAS(BN_sqr);
diff --git a/src/lib/libcrypto/bn/bn_word.c b/src/lib/libcrypto/bn/bn_word.c
deleted file mode 100644
index a82b911e67..0000000000
--- a/src/lib/libcrypto/bn/bn_word.c
+++ /dev/null
@@ -1,245 +0,0 @@
1/* $OpenBSD: bn_word.c,v 1.21 2023/07/08 12:21:58 beck Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60
61#include "bn_local.h"
62
63BN_ULONG
64BN_mod_word(const BIGNUM *a, BN_ULONG w)
65{
66#ifndef BN_LLONG
67 BN_ULONG ret = 0;
68#else
69 BN_ULLONG ret = 0;
70#endif
71 int i;
72
73 if (w == 0)
74 return (BN_ULONG) - 1;
75
76#ifndef BN_ULLONG
77 /* If |w| is too long and we don't have |BN_ULLONG| then we need to fall back
78 * to using |BN_div_word|. */
79 if (w > ((BN_ULONG)1 << BN_BITS4)) {
80 BIGNUM *tmp = BN_dup(a);
81 if (tmp == NULL) {
82 return (BN_ULONG)-1;
83 }
84 ret = BN_div_word(tmp, w);
85 BN_free(tmp);
86 return ret;
87 }
88#endif
89
90 w &= BN_MASK2;
91 for (i = a->top - 1; i >= 0; i--) {
92#ifndef BN_LLONG
93 ret = ((ret << BN_BITS4) | ((a->d[i] >> BN_BITS4) &
94 BN_MASK2l)) % w;
95 ret = ((ret << BN_BITS4) | (a->d[i] & BN_MASK2l)) % w;
96#else
97 ret = (BN_ULLONG)(((ret << (BN_ULLONG)BN_BITS2) |
98 a->d[i]) % (BN_ULLONG)w);
99#endif
100 }
101 return ((BN_ULONG)ret);
102}
103LCRYPTO_ALIAS(BN_mod_word);
104
105BN_ULONG
106BN_div_word(BIGNUM *a, BN_ULONG w)
107{
108 BN_ULONG ret = 0;
109 int i, j;
110
111 w &= BN_MASK2;
112
113 if (!w)
114 /* actually this an error (division by zero) */
115 return (BN_ULONG) - 1;
116 if (a->top == 0)
117 return 0;
118
119 /* normalize input (so bn_div_words doesn't complain) */
120 j = BN_BITS2 - BN_num_bits_word(w);
121 w <<= j;
122 if (!BN_lshift(a, a, j))
123 return (BN_ULONG) - 1;
124
125 for (i = a->top - 1; i >= 0; i--) {
126 BN_ULONG l, d;
127
128 l = a->d[i];
129 bn_div_rem_words(ret, l, w, &d, &ret);
130 a->d[i] = d;
131 }
132 if ((a->top > 0) && (a->d[a->top - 1] == 0))
133 a->top--;
134 ret >>= j;
135
136 /* Set negative again, to handle -0 case. */
137 BN_set_negative(a, a->neg);
138
139 return (ret);
140}
141LCRYPTO_ALIAS(BN_div_word);
142
143int
144BN_add_word(BIGNUM *a, BN_ULONG w)
145{
146 BN_ULONG l;
147 int i;
148
149 w &= BN_MASK2;
150
151 /* degenerate case: w is zero */
152 if (!w)
153 return 1;
154 /* degenerate case: a is zero */
155 if (BN_is_zero(a))
156 return BN_set_word(a, w);
157 /* handle 'a' when negative */
158 if (a->neg) {
159 a->neg = 0;
160 i = BN_sub_word(a, w);
161 BN_set_negative(a, !a->neg);
162 return (i);
163 }
164 for (i = 0; w != 0 && i < a->top; i++) {
165 a->d[i] = l = (a->d[i] + w) & BN_MASK2;
166 w = (w > l) ? 1 : 0;
167 }
168 if (w && i == a->top) {
169 if (!bn_wexpand(a, a->top + 1))
170 return 0;
171 a->top++;
172 a->d[i] = w;
173 }
174 return (1);
175}
176LCRYPTO_ALIAS(BN_add_word);
177
178int
179BN_sub_word(BIGNUM *a, BN_ULONG w)
180{
181 int i;
182
183 w &= BN_MASK2;
184
185 /* degenerate case: w is zero */
186 if (!w)
187 return 1;
188 /* degenerate case: a is zero */
189 if (BN_is_zero(a)) {
190 i = BN_set_word(a, w);
191 if (i != 0)
192 BN_set_negative(a, 1);
193 return i;
194 }
195 /* handle 'a' when negative */
196 if (a->neg) {
197 a->neg = 0;
198 i = BN_add_word(a, w);
199 BN_set_negative(a, !a->neg);
200 return (i);
201 }
202
203 if ((a->top == 1) && (a->d[0] < w)) {
204 a->d[0] = w - a->d[0];
205 BN_set_negative(a, 1);
206 return (1);
207 }
208 i = 0;
209 for (;;) {
210 if (a->d[i] >= w) {
211 a->d[i] -= w;
212 break;
213 } else {
214 a->d[i] = (a->d[i] - w) & BN_MASK2;
215 i++;
216 w = 1;
217 }
218 }
219 if ((a->d[i] == 0) && (i == (a->top - 1)))
220 a->top--;
221 return (1);
222}
223LCRYPTO_ALIAS(BN_sub_word);
224
225int
226BN_mul_word(BIGNUM *a, BN_ULONG w)
227{
228 BN_ULONG ll;
229
230 w &= BN_MASK2;
231 if (a->top) {
232 if (w == 0)
233 BN_zero(a);
234 else {
235 ll = bn_mul_words(a->d, a->d, a->top, w);
236 if (ll) {
237 if (!bn_wexpand(a, a->top + 1))
238 return (0);
239 a->d[a->top++] = ll;
240 }
241 }
242 }
243 return (1);
244}
245LCRYPTO_ALIAS(BN_mul_word);
diff --git a/src/lib/libcrypto/bn/s2n_bignum.h b/src/lib/libcrypto/bn/s2n_bignum.h
deleted file mode 100644
index ce6e8cdc94..0000000000
--- a/src/lib/libcrypto/bn/s2n_bignum.h
+++ /dev/null
@@ -1,856 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// C prototypes for s2n-bignum functions, so you can use them in C programs via
17//
18// #include "s2n-bignum.h"
19//
20// The functions are listed in alphabetical order with a brief description
21// in comments for each one. For more detailed documentation see the comment
22// banner at the top of the corresponding assembly (.S) file, and
23// for the last word in what properties it satisfies see the spec in the
24// formal proof (the .ml file in the architecture-specific directory).
25//
26// For some functions there are additional variants with names ending in
27// "_alt". These have the same core mathematical functionality as their
28// non-"alt" versions, but can be better suited to some microarchitectures:
29//
30// - On x86, the "_alt" forms avoid BMI and ADX instruction set
31// extensions, so will run on any x86_64 machine, even older ones
32//
33// - On ARM, the "_alt" forms target machines with higher multiplier
34// throughput, generally offering higher performance there.
35// ----------------------------------------------------------------------------
36
37// Add, z := x + y
38// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
39extern uint64_t bignum_add (uint64_t p, uint64_t *z, uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
40
41// Add modulo p_25519, z := (x + y) mod p_25519, assuming x and y reduced
42// Inputs x[4], y[4]; output z[4]
43extern void bignum_add_p25519 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]);
44
45// Add modulo p_256, z := (x + y) mod p_256, assuming x and y reduced
46// Inputs x[4], y[4]; output z[4]
47extern void bignum_add_p256 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]);
48
49// Add modulo p_256k1, z := (x + y) mod p_256k1, assuming x and y reduced
50// Inputs x[4], y[4]; output z[4]
51extern void bignum_add_p256k1 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]);
52
53// Add modulo p_384, z := (x + y) mod p_384, assuming x and y reduced
54// Inputs x[6], y[6]; output z[6]
55extern void bignum_add_p384 (uint64_t z[static 6], uint64_t x[static 6], uint64_t y[static 6]);
56
57// Add modulo p_521, z := (x + y) mod p_521, assuming x and y reduced
58// Inputs x[9], y[9]; output z[9]
59extern void bignum_add_p521 (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]);
60
61// Compute "amontification" constant z :== 2^{128k} (congruent mod m)
62// Input m[k]; output z[k]; temporary buffer t[>=k]
63extern void bignum_amontifier (uint64_t k, uint64_t *z, uint64_t *m, uint64_t *t);
64
65// Almost-Montgomery multiply, z :== (x * y / 2^{64k}) (congruent mod m)
66// Inputs x[k], y[k], m[k]; output z[k]
67extern void bignum_amontmul (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y, uint64_t *m);
68
69// Almost-Montgomery reduce, z :== (x' / 2^{64p}) (congruent mod m)
70// Inputs x[n], m[k], p; output z[k]
71extern void bignum_amontredc (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t *m, uint64_t p);
72
73// Almost-Montgomery square, z :== (x^2 / 2^{64k}) (congruent mod m)
74// Inputs x[k], m[k]; output z[k]
75extern void bignum_amontsqr (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *m);
76
77// Convert 4-digit (256-bit) bignum to/from big-endian form
78// Input x[4]; output z[4]
79extern void bignum_bigendian_4 (uint64_t z[static 4], uint64_t x[static 4]);
80
81// Convert 6-digit (384-bit) bignum to/from big-endian form
82// Input x[6]; output z[6]
83extern void bignum_bigendian_6 (uint64_t z[static 6], uint64_t x[static 6]);
84
85// Select bitfield starting at bit n with length l <= 64
86// Inputs x[k], n, l; output function return
87extern uint64_t bignum_bitfield (uint64_t k, uint64_t *x, uint64_t n, uint64_t l);
88
89// Return size of bignum in bits
90// Input x[k]; output function return
91extern uint64_t bignum_bitsize (uint64_t k, uint64_t *x);
92
93// Divide by a single (nonzero) word, z := x / m and return x mod m
94// Inputs x[n], m; outputs function return (remainder) and z[k]
95extern uint64_t bignum_cdiv (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t m);
96
97// Divide by a single word, z := x / m when known to be exact
98// Inputs x[n], m; output z[k]
99extern void bignum_cdiv_exact (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t m);
100
101// Count leading zero digits (64-bit words)
102// Input x[k]; output function return
103extern uint64_t bignum_cld (uint64_t k, uint64_t *x);
104
105// Count leading zero bits
106// Input x[k]; output function return
107extern uint64_t bignum_clz (uint64_t k, uint64_t *x);
108
109// Multiply-add with single-word multiplier, z := z + c * y
110// Inputs c, y[n]; outputs function return (carry-out) and z[k]
111extern uint64_t bignum_cmadd (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y);
112
113// Negated multiply-add with single-word multiplier, z := z - c * y
114// Inputs c, y[n]; outputs function return (negative carry-out) and z[k]
115extern uint64_t bignum_cmnegadd (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y);
116
117// Find modulus of bignum w.r.t. single nonzero word m, returning x mod m
118// Input x[k], m; output function return
119extern uint64_t bignum_cmod (uint64_t k, uint64_t *x, uint64_t m);
120
121// Multiply by a single word, z := c * y
122// Inputs c, y[n]; outputs function return (carry-out) and z[k]
123extern uint64_t bignum_cmul (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y);
124
125// Multiply by a single word modulo p_25519, z := (c * x) mod p_25519, assuming x reduced
126// Inputs c, x[4]; output z[4]
127extern void bignum_cmul_p25519 (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]);
128extern void bignum_cmul_p25519_alt (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]);
129
130// Multiply by a single word modulo p_256, z := (c * x) mod p_256, assuming x reduced
131// Inputs c, x[4]; output z[4]
132extern void bignum_cmul_p256 (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]);
133extern void bignum_cmul_p256_alt (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]);
134
135// Multiply by a single word modulo p_256k1, z := (c * x) mod p_256k1, assuming x reduced
136// Inputs c, x[4]; output z[4]
137extern void bignum_cmul_p256k1 (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]);
138extern void bignum_cmul_p256k1_alt (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]);
139
140// Multiply by a single word modulo p_384, z := (c * x) mod p_384, assuming x reduced
141// Inputs c, x[6]; output z[6]
142extern void bignum_cmul_p384 (uint64_t z[static 6], uint64_t c, uint64_t x[static 6]);
143extern void bignum_cmul_p384_alt (uint64_t z[static 6], uint64_t c, uint64_t x[static 6]);
144
145// Multiply by a single word modulo p_521, z := (c * x) mod p_521, assuming x reduced
146// Inputs c, x[9]; output z[9]
147extern void bignum_cmul_p521 (uint64_t z[static 9], uint64_t c, uint64_t x[static 9]);
148extern void bignum_cmul_p521_alt (uint64_t z[static 9], uint64_t c, uint64_t x[static 9]);
149
150// Test bignums for coprimality, gcd(x,y) = 1
151// Inputs x[m], y[n]; output function return; temporary buffer t[>=2*max(m,n)]
152extern uint64_t bignum_coprime (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y, uint64_t *t);
153
154// Copy bignum with zero-extension or truncation, z := x
155// Input x[n]; output z[k]
156extern void bignum_copy (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x);
157
158// Count trailing zero digits (64-bit words)
159// Input x[k]; output function return
160extern uint64_t bignum_ctd (uint64_t k, uint64_t *x);
161
162// Count trailing zero bits
163// Input x[k]; output function return
164extern uint64_t bignum_ctz (uint64_t k, uint64_t *x);
165
166// Convert from almost-Montgomery form, z := (x / 2^256) mod p_256
167// Input x[4]; output z[4]
168extern void bignum_deamont_p256 (uint64_t z[static 4], uint64_t x[static 4]);
169extern void bignum_deamont_p256_alt (uint64_t z[static 4], uint64_t x[static 4]);
170
171// Convert from almost-Montgomery form, z := (x / 2^256) mod p_256k1
172// Input x[4]; output z[4]
173extern void bignum_deamont_p256k1 (uint64_t z[static 4], uint64_t x[static 4]);
174
175// Convert from almost-Montgomery form, z := (x / 2^384) mod p_384
176// Input x[6]; output z[6]
177extern void bignum_deamont_p384 (uint64_t z[static 6], uint64_t x[static 6]);
178extern void bignum_deamont_p384_alt (uint64_t z[static 6], uint64_t x[static 6]);
179
180// Convert from almost-Montgomery form z := (x / 2^576) mod p_521
181// Input x[9]; output z[9]
182extern void bignum_deamont_p521 (uint64_t z[static 9], uint64_t x[static 9]);
183
184// Convert from (almost-)Montgomery form z := (x / 2^{64k}) mod m
185// Inputs x[k], m[k]; output z[k]
186extern void bignum_demont (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *m);
187
188// Convert from Montgomery form z := (x / 2^256) mod p_256, assuming x reduced
189// Input x[4]; output z[4]
190extern void bignum_demont_p256 (uint64_t z[static 4], uint64_t x[static 4]);
191extern void bignum_demont_p256_alt (uint64_t z[static 4], uint64_t x[static 4]);
192
193// Convert from Montgomery form z := (x / 2^256) mod p_256k1, assuming x reduced
194// Input x[4]; output z[4]
195extern void bignum_demont_p256k1 (uint64_t z[static 4], uint64_t x[static 4]);
196
197// Convert from Montgomery form z := (x / 2^384) mod p_384, assuming x reduced
198// Input x[6]; output z[6]
199extern void bignum_demont_p384 (uint64_t z[static 6], uint64_t x[static 6]);
200extern void bignum_demont_p384_alt (uint64_t z[static 6], uint64_t x[static 6]);
201
202// Convert from Montgomery form z := (x / 2^576) mod p_521, assuming x reduced
203// Input x[9]; output z[9]
204extern void bignum_demont_p521 (uint64_t z[static 9], uint64_t x[static 9]);
205
206// Select digit x[n]
207// Inputs x[k], n; output function return
208extern uint64_t bignum_digit (uint64_t k, uint64_t *x, uint64_t n);
209
210// Return size of bignum in digits (64-bit word)
211// Input x[k]; output function return
212extern uint64_t bignum_digitsize (uint64_t k, uint64_t *x);
213
214// Divide bignum by 10: z' := z div 10, returning remainder z mod 10
215// Inputs z[k]; outputs function return (remainder) and z[k]
216extern uint64_t bignum_divmod10 (uint64_t k, uint64_t *z);
217
218// Double modulo p_25519, z := (2 * x) mod p_25519, assuming x reduced
219// Input x[4]; output z[4]
220extern void bignum_double_p25519 (uint64_t z[static 4], uint64_t x[static 4]);
221
222// Double modulo p_256, z := (2 * x) mod p_256, assuming x reduced
223// Input x[4]; output z[4]
224extern void bignum_double_p256 (uint64_t z[static 4], uint64_t x[static 4]);
225
226// Double modulo p_256k1, z := (2 * x) mod p_256k1, assuming x reduced
227// Input x[4]; output z[4]
228extern void bignum_double_p256k1 (uint64_t z[static 4], uint64_t x[static 4]);
229
230// Double modulo p_384, z := (2 * x) mod p_384, assuming x reduced
231// Input x[6]; output z[6]
232extern void bignum_double_p384 (uint64_t z[static 6], uint64_t x[static 6]);
233
234// Double modulo p_521, z := (2 * x) mod p_521, assuming x reduced
235// Input x[9]; output z[9]
236extern void bignum_double_p521 (uint64_t z[static 9], uint64_t x[static 9]);
237
238// Extended Montgomery reduce, returning results in input-output buffer
239// Inputs z[2*k], m[k], w; outputs function return (extra result bit) and z[2*k]
240extern uint64_t bignum_emontredc (uint64_t k, uint64_t *z, uint64_t *m, uint64_t w);
241
242// Extended Montgomery reduce in 8-digit blocks, results in input-output buffer
243// Inputs z[2*k], m[k], w; outputs function return (extra result bit) and z[2*k]
244extern uint64_t bignum_emontredc_8n (uint64_t k, uint64_t *z, uint64_t *m, uint64_t w);
245
246// Test bignums for equality, x = y
247// Inputs x[m], y[n]; output function return
248extern uint64_t bignum_eq (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
249
250// Test bignum for even-ness
251// Input x[k]; output function return
252extern uint64_t bignum_even (uint64_t k, uint64_t *x);
253
254// Convert 4-digit (256-bit) bignum from big-endian bytes
255// Input x[32] (bytes); output z[4]
256extern void bignum_frombebytes_4 (uint64_t z[static 4], uint8_t x[static 32]);
257
258// Convert 6-digit (384-bit) bignum from big-endian bytes
259// Input x[48] (bytes); output z[6]
260extern void bignum_frombebytes_6 (uint64_t z[static 6], uint8_t x[static 48]);
261
262// Convert 4-digit (256-bit) bignum from little-endian bytes
263// Input x[32] (bytes); output z[4]
264extern void bignum_fromlebytes_4 (uint64_t z[static 4], uint8_t x[static 32]);
265
266// Convert 6-digit (384-bit) bignum from little-endian bytes
267// Input x[48] (bytes); output z[6]
268extern void bignum_fromlebytes_6 (uint64_t z[static 6], uint8_t x[static 48]);
269
270// Convert little-endian bytes to 9-digit 528-bit bignum
271// Input x[66] (bytes); output z[9]
272extern void bignum_fromlebytes_p521 (uint64_t z[static 9],uint8_t x[static 66]);
273
274// Compare bignums, x >= y
275// Inputs x[m], y[n]; output function return
276extern uint64_t bignum_ge (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
277
278// Compare bignums, x > y
279// Inputs x[m], y[n]; output function return
280extern uint64_t bignum_gt (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
281
282// Halve modulo p_256, z := (x / 2) mod p_256, assuming x reduced
283// Input x[4]; output z[4]
284extern void bignum_half_p256 (uint64_t z[static 4], uint64_t x[static 4]);
285
286// Halve modulo p_256k1, z := (x / 2) mod p_256k1, assuming x reduced
287// Input x[4]; output z[4]
288extern void bignum_half_p256k1 (uint64_t z[static 4], uint64_t x[static 4]);
289
290// Halve modulo p_384, z := (x / 2) mod p_384, assuming x reduced
291// Input x[6]; output z[6]
292extern void bignum_half_p384 (uint64_t z[static 6], uint64_t x[static 6]);
293
294// Halve modulo p_521, z := (x / 2) mod p_521, assuming x reduced
295// Input x[9]; output z[9]
296extern void bignum_half_p521 (uint64_t z[static 9], uint64_t x[static 9]);
297
298// Test bignum for zero-ness, x = 0
299// Input x[k]; output function return
300extern uint64_t bignum_iszero (uint64_t k, uint64_t *x);
301
302// Multiply z := x * y
303// Inputs x[16], y[16]; output z[32]; temporary buffer t[>=32]
304extern void bignum_kmul_16_32 (uint64_t z[static 32], uint64_t x[static 16], uint64_t y[static 16], uint64_t t[static 32]);
305
306// Multiply z := x * y
307// Inputs x[32], y[32]; output z[64]; temporary buffer t[>=96]
308extern void bignum_kmul_32_64 (uint64_t z[static 64], uint64_t x[static 32], uint64_t y[static 32], uint64_t t[static 96]);
309
310// Square, z := x^2
311// Input x[16]; output z[32]; temporary buffer t[>=24]
312extern void bignum_ksqr_16_32 (uint64_t z[static 32], uint64_t x[static 16], uint64_t t[static 24]);
313
314// Square, z := x^2
315// Input x[32]; output z[64]; temporary buffer t[>=72]
316extern void bignum_ksqr_32_64 (uint64_t z[static 64], uint64_t x[static 32], uint64_t t[static 72]);
317
318// Compare bignums, x <= y
319// Inputs x[m], y[n]; output function return
320extern uint64_t bignum_le (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
321
322// Convert 4-digit (256-bit) bignum to/from little-endian form
323// Input x[4]; output z[4]
324extern void bignum_littleendian_4 (uint64_t z[static 4], uint64_t x[static 4]);
325
326// Convert 6-digit (384-bit) bignum to/from little-endian form
327// Input x[6]; output z[6]
328extern void bignum_littleendian_6 (uint64_t z[static 6], uint64_t x[static 6]);
329
330// Compare bignums, x < y
331// Inputs x[m], y[n]; output function return
332extern uint64_t bignum_lt (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
333
334// Multiply-add, z := z + x * y
335// Inputs x[m], y[n]; outputs function return (carry-out) and z[k]
336extern uint64_t bignum_madd (uint64_t k, uint64_t *z, uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
337
338// Reduce modulo group order, z := x mod n_256
339// Input x[k]; output z[4]
340extern void bignum_mod_n256 (uint64_t z[static 4], uint64_t k, uint64_t *x);
341extern void bignum_mod_n256_alt (uint64_t z[static 4], uint64_t k, uint64_t *x);
342
343// Reduce modulo group order, z := x mod n_256
344// Input x[4]; output z[4]
345extern void bignum_mod_n256_4 (uint64_t z[static 4], uint64_t x[static 4]);
346
347// Reduce modulo group order, z := x mod n_256k1
348// Input x[4]; output z[4]
349extern void bignum_mod_n256k1_4 (uint64_t z[static 4], uint64_t x[static 4]);
350
351// Reduce modulo group order, z := x mod n_384
352// Input x[k]; output z[6]
353extern void bignum_mod_n384 (uint64_t z[static 6], uint64_t k, uint64_t *x);
354extern void bignum_mod_n384_alt (uint64_t z[static 6], uint64_t k, uint64_t *x);
355
356// Reduce modulo group order, z := x mod n_384
357// Input x[6]; output z[6]
358extern void bignum_mod_n384_6 (uint64_t z[static 6], uint64_t x[static 6]);
359
360// Reduce modulo group order, z := x mod n_521
361// Input x[9]; output z[9]
362extern void bignum_mod_n521_9 (uint64_t z[static 9], uint64_t x[static 9]);
363extern void bignum_mod_n521_9_alt (uint64_t z[static 9], uint64_t x[static 9]);
364
365// Reduce modulo field characteristic, z := x mod p_25519
366// Input x[4]; output z[4]
367extern void bignum_mod_p25519_4 (uint64_t z[static 4], uint64_t x[static 4]);
368
369// Reduce modulo field characteristic, z := x mod p_256
370// Input x[k]; output z[4]
371extern void bignum_mod_p256 (uint64_t z[static 4], uint64_t k, uint64_t *x);
372extern void bignum_mod_p256_alt (uint64_t z[static 4], uint64_t k, uint64_t *x);
373
374// Reduce modulo field characteristic, z := x mod p_256
375// Input x[4]; output z[4]
376extern void bignum_mod_p256_4 (uint64_t z[static 4], uint64_t x[static 4]);
377
378// Reduce modulo field characteristic, z := x mod p_256k1
379// Input x[4]; output z[4]
380extern void bignum_mod_p256k1_4 (uint64_t z[static 4], uint64_t x[static 4]);
381
382// Reduce modulo field characteristic, z := x mod p_384
383// Input x[k]; output z[6]
384extern void bignum_mod_p384 (uint64_t z[static 6], uint64_t k, uint64_t *x);
385extern void bignum_mod_p384_alt (uint64_t z[static 6], uint64_t k, uint64_t *x);
386
387// Reduce modulo field characteristic, z := x mod p_384
388// Input x[6]; output z[6]
389extern void bignum_mod_p384_6 (uint64_t z[static 6], uint64_t x[static 6]);
390
391// Reduce modulo field characteristic, z := x mod p_521
392// Input x[9]; output z[9]
393extern void bignum_mod_p521_9 (uint64_t z[static 9], uint64_t x[static 9]);
394
395// Add modulo m, z := (x + y) mod m, assuming x and y reduced
396// Inputs x[k], y[k], m[k]; output z[k]
397extern void bignum_modadd (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y, uint64_t *m);
398
399// Double modulo m, z := (2 * x) mod m, assuming x reduced
400// Inputs x[k], m[k]; output z[k]
401extern void bignum_moddouble (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *m);
402
403// Compute "modification" constant z := 2^{64k} mod m
404// Input m[k]; output z[k]; temporary buffer t[>=k]
405extern void bignum_modifier (uint64_t k, uint64_t *z, uint64_t *m, uint64_t *t);
406
407// Invert modulo m, z = (1/a) mod b, assuming b is an odd number > 1, a coprime to b
408// Inputs a[k], b[k]; output z[k]; temporary buffer t[>=3*k]
409extern void bignum_modinv (uint64_t k, uint64_t *z, uint64_t *a, uint64_t *b, uint64_t *t);
410
411// Optionally negate modulo m, z := (-x) mod m (if p nonzero) or z := x (if p zero), assuming x reduced
412// Inputs p, x[k], m[k]; output z[k]
413extern void bignum_modoptneg (uint64_t k, uint64_t *z, uint64_t p, uint64_t *x, uint64_t *m);
414
415// Subtract modulo m, z := (x - y) mod m, assuming x and y reduced
416// Inputs x[k], y[k], m[k]; output z[k]
417extern void bignum_modsub (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y, uint64_t *m);
418
419// Compute "montification" constant z := 2^{128k} mod m
420// Input m[k]; output z[k]; temporary buffer t[>=k]
421extern void bignum_montifier (uint64_t k, uint64_t *z, uint64_t *m, uint64_t *t);
422
423// Montgomery multiply, z := (x * y / 2^{64k}) mod m
424// Inputs x[k], y[k], m[k]; output z[k]
425extern void bignum_montmul (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y, uint64_t *m);
426
427// Montgomery multiply, z := (x * y / 2^256) mod p_256
428// Inputs x[4], y[4]; output z[4]
429extern void bignum_montmul_p256 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]);
430extern void bignum_montmul_p256_alt (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]);
431
432// Montgomery multiply, z := (x * y / 2^256) mod p_256k1
433// Inputs x[4], y[4]; output z[4]
434extern void bignum_montmul_p256k1 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]);
435extern void bignum_montmul_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]);
436
437// Montgomery multiply, z := (x * y / 2^384) mod p_384
438// Inputs x[6], y[6]; output z[6]
439extern void bignum_montmul_p384 (uint64_t z[static 6], uint64_t x[static 6], uint64_t y[static 6]);
440extern void bignum_montmul_p384_alt (uint64_t z[static 6], uint64_t x[static 6], uint64_t y[static 6]);
441
442// Montgomery multiply, z := (x * y / 2^576) mod p_521
443// Inputs x[9], y[9]; output z[9]
444extern void bignum_montmul_p521 (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]);
445extern void bignum_montmul_p521_alt (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]);
446
447// Montgomery reduce, z := (x' / 2^{64p}) MOD m
448// Inputs x[n], m[k], p; output z[k]
449extern void bignum_montredc (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t *m, uint64_t p);
450
451// Montgomery square, z := (x^2 / 2^{64k}) mod m
452// Inputs x[k], m[k]; output z[k]
453extern void bignum_montsqr (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *m);
454
455// Montgomery square, z := (x^2 / 2^256) mod p_256
456// Input x[4]; output z[4]
457extern void bignum_montsqr_p256 (uint64_t z[static 4], uint64_t x[static 4]);
458extern void bignum_montsqr_p256_alt (uint64_t z[static 4], uint64_t x[static 4]);
459
460// Montgomery square, z := (x^2 / 2^256) mod p_256k1
461// Input x[4]; output z[4]
462extern void bignum_montsqr_p256k1 (uint64_t z[static 4], uint64_t x[static 4]);
463extern void bignum_montsqr_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4]);
464
465// Montgomery square, z := (x^2 / 2^384) mod p_384
466// Input x[6]; output z[6]
467extern void bignum_montsqr_p384 (uint64_t z[static 6], uint64_t x[static 6]);
468extern void bignum_montsqr_p384_alt (uint64_t z[static 6], uint64_t x[static 6]);
469
470// Montgomery square, z := (x^2 / 2^576) mod p_521
471// Input x[9]; output z[9]
472extern void bignum_montsqr_p521 (uint64_t z[static 9], uint64_t x[static 9]);
473extern void bignum_montsqr_p521_alt (uint64_t z[static 9], uint64_t x[static 9]);
474
475// Multiply z := x * y
476// Inputs x[m], y[n]; output z[k]
477extern void bignum_mul (uint64_t k, uint64_t *z, uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
478
479// Multiply z := x * y
480// Inputs x[4], y[4]; output z[8]
481extern void bignum_mul_4_8 (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]);
482extern void bignum_mul_4_8_alt (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]);
483
484// Multiply z := x * y
485// Inputs x[6], y[6]; output z[12]
486extern void bignum_mul_6_12 (uint64_t z[static 12], uint64_t x[static 6], uint64_t y[static 6]);
487extern void bignum_mul_6_12_alt (uint64_t z[static 12], uint64_t x[static 6], uint64_t y[static 6]);
488
489// Multiply z := x * y
490// Inputs x[8], y[8]; output z[16]
491extern void bignum_mul_8_16 (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]);
492extern void bignum_mul_8_16_alt (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]);
493
494// Multiply modulo p_25519, z := (x * y) mod p_25519
495// Inputs x[4], y[4]; output z[4]
496extern void bignum_mul_p25519 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]);
497extern void bignum_mul_p25519_alt (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]);
498
499// Multiply modulo p_256k1, z := (x * y) mod p_256k1
500// Inputs x[4], y[4]; output z[4]
501extern void bignum_mul_p256k1 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]);
502extern void bignum_mul_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]);
503
504// Multiply modulo p_521, z := (x * y) mod p_521, assuming x and y reduced
505// Inputs x[9], y[9]; output z[9]
506extern void bignum_mul_p521 (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]);
507extern void bignum_mul_p521_alt (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]);
508
509// Multiply bignum by 10 and add word: z := 10 * z + d
510// Inputs z[k], d; outputs function return (carry) and z[k]
511extern uint64_t bignum_muladd10 (uint64_t k, uint64_t *z, uint64_t d);
512
513// Multiplex/select z := x (if p nonzero) or z := y (if p zero)
514// Inputs p, x[k], y[k]; output z[k]
515extern void bignum_mux (uint64_t p, uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y);
516
517// 256-bit multiplex/select z := x (if p nonzero) or z := y (if p zero)
518// Inputs p, x[4], y[4]; output z[4]
519extern void bignum_mux_4 (uint64_t p, uint64_t z[static 4],uint64_t x[static 4], uint64_t y[static 4]);
520
521// 384-bit multiplex/select z := x (if p nonzero) or z := y (if p zero)
522// Inputs p, x[6], y[6]; output z[6]
523extern void bignum_mux_6 (uint64_t p, uint64_t z[static 6],uint64_t x[static 6], uint64_t y[static 6]);
524
525// Select element from 16-element table, z := xs[k*i]
526// Inputs xs[16*k], i; output z[k]
527extern void bignum_mux16 (uint64_t k, uint64_t *z, uint64_t *xs, uint64_t i);
528
529// Negate modulo p_25519, z := (-x) mod p_25519, assuming x reduced
530// Input x[4]; output z[4]
531extern void bignum_neg_p25519 (uint64_t z[static 4], uint64_t x[static 4]);
532
533// Negate modulo p_256, z := (-x) mod p_256, assuming x reduced
534// Input x[4]; output z[4]
535extern void bignum_neg_p256 (uint64_t z[static 4], uint64_t x[static 4]);
536
537// Negate modulo p_256k1, z := (-x) mod p_256k1, assuming x reduced
538// Input x[4]; output z[4]
539extern void bignum_neg_p256k1 (uint64_t z[static 4], uint64_t x[static 4]);
540
541// Negate modulo p_384, z := (-x) mod p_384, assuming x reduced
542// Input x[6]; output z[6]
543extern void bignum_neg_p384 (uint64_t z[static 6], uint64_t x[static 6]);
544
545// Negate modulo p_521, z := (-x) mod p_521, assuming x reduced
546// Input x[9]; output z[9]
547extern void bignum_neg_p521 (uint64_t z[static 9], uint64_t x[static 9]);
548
549// Negated modular inverse, z := (-1/x) mod 2^{64k}
550// Input x[k]; output z[k]
551extern void bignum_negmodinv (uint64_t k, uint64_t *z, uint64_t *x);
552
553// Test bignum for nonzero-ness x =/= 0
554// Input x[k]; output function return
555extern uint64_t bignum_nonzero (uint64_t k, uint64_t *x);
556
557// Test 256-bit bignum for nonzero-ness x =/= 0
558// Input x[4]; output function return
559extern uint64_t bignum_nonzero_4(uint64_t x[static 4]);
560
561// Test 384-bit bignum for nonzero-ness x =/= 0
562// Input x[6]; output function return
563extern uint64_t bignum_nonzero_6(uint64_t x[static 6]);
564
565// Normalize bignum in-place by shifting left till top bit is 1
566// Input z[k]; outputs function return (bits shifted left) and z[k]
567extern uint64_t bignum_normalize (uint64_t k, uint64_t *z);
568
569// Test bignum for odd-ness
570// Input x[k]; output function return
571extern uint64_t bignum_odd (uint64_t k, uint64_t *x);
572
573// Convert single digit to bignum, z := n
574// Input n; output z[k]
575extern void bignum_of_word (uint64_t k, uint64_t *z, uint64_t n);
576
577// Optionally add, z := x + y (if p nonzero) or z := x (if p zero)
578// Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k]
579extern uint64_t bignum_optadd (uint64_t k, uint64_t *z, uint64_t *x, uint64_t p, uint64_t *y);
580
581// Optionally negate, z := -x (if p nonzero) or z := x (if p zero)
582// Inputs p, x[k]; outputs function return (nonzero input) and z[k]
583extern uint64_t bignum_optneg (uint64_t k, uint64_t *z, uint64_t p, uint64_t *x);
584
585// Optionally negate modulo p_25519, z := (-x) mod p_25519 (if p nonzero) or z := x (if p zero), assuming x reduced
586// Inputs p, x[4]; output z[4]
587extern void bignum_optneg_p25519 (uint64_t z[static 4], uint64_t p, uint64_t x[static 4]);
588
589// Optionally negate modulo p_256, z := (-x) mod p_256 (if p nonzero) or z := x (if p zero), assuming x reduced
590// Inputs p, x[4]; output z[4]
591extern void bignum_optneg_p256 (uint64_t z[static 4], uint64_t p, uint64_t x[static 4]);
592
593// Optionally negate modulo p_256k1, z := (-x) mod p_256k1 (if p nonzero) or z := x (if p zero), assuming x reduced
594// Inputs p, x[4]; output z[4]
595extern void bignum_optneg_p256k1 (uint64_t z[static 4], uint64_t p, uint64_t x[static 4]);
596
597// Optionally negate modulo p_384, z := (-x) mod p_384 (if p nonzero) or z := x (if p zero), assuming x reduced
598// Inputs p, x[6]; output z[6]
599extern void bignum_optneg_p384 (uint64_t z[static 6], uint64_t p, uint64_t x[static 6]);
600
601// Optionally negate modulo p_521, z := (-x) mod p_521 (if p nonzero) or z := x (if p zero), assuming x reduced
602// Inputs p, x[9]; output z[9]
603extern void bignum_optneg_p521 (uint64_t z[static 9], uint64_t p, uint64_t x[static 9]);
604
605// Optionally subtract, z := x - y (if p nonzero) or z := x (if p zero)
606// Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k]
607extern uint64_t bignum_optsub (uint64_t k, uint64_t *z, uint64_t *x, uint64_t p, uint64_t *y);
608
609// Optionally subtract or add, z := x + sgn(p) * y interpreting p as signed
610// Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k]
611extern uint64_t bignum_optsubadd (uint64_t k, uint64_t *z, uint64_t *x, uint64_t p, uint64_t *y);
612
613// Return bignum of power of 2, z := 2^n
614// Input n; output z[k]
615extern void bignum_pow2 (uint64_t k, uint64_t *z, uint64_t n);
616
617// Shift bignum left by c < 64 bits z := x * 2^c
618// Inputs x[n], c; outputs function return (carry-out) and z[k]
619extern uint64_t bignum_shl_small (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t c);
620
621// Shift bignum right by c < 64 bits z := floor(x / 2^c)
622// Inputs x[n], c; outputs function return (bits shifted out) and z[k]
623extern uint64_t bignum_shr_small (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t c);
624
625// Square, z := x^2
626// Input x[n]; output z[k]
627extern void bignum_sqr (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x);
628
629// Square, z := x^2
630// Input x[4]; output z[8]
631extern void bignum_sqr_4_8 (uint64_t z[static 8], uint64_t x[static 4]);
632extern void bignum_sqr_4_8_alt (uint64_t z[static 8], uint64_t x[static 4]);
633
634// Square, z := x^2
635// Input x[6]; output z[12]
636extern void bignum_sqr_6_12 (uint64_t z[static 12], uint64_t x[static 6]);
637extern void bignum_sqr_6_12_alt (uint64_t z[static 12], uint64_t x[static 6]);
638
639// Square, z := x^2
640// Input x[8]; output z[16]
641extern void bignum_sqr_8_16 (uint64_t z[static 16], uint64_t x[static 8]);
642extern void bignum_sqr_8_16_alt (uint64_t z[static 16], uint64_t x[static 8]);
643
644// Square modulo p_25519, z := (x^2) mod p_25519
645// Input x[4]; output z[4]
646extern void bignum_sqr_p25519 (uint64_t z[static 4], uint64_t x[static 4]);
647extern void bignum_sqr_p25519_alt (uint64_t z[static 4], uint64_t x[static 4]);
648
649// Square modulo p_256k1, z := (x^2) mod p_256k1
650// Input x[4]; output z[4]
651extern void bignum_sqr_p256k1 (uint64_t z[static 4], uint64_t x[static 4]);
652extern void bignum_sqr_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4]);
653
654// Square modulo p_521, z := (x^2) mod p_521, assuming x reduced
655// Input x[9]; output z[9]
656extern void bignum_sqr_p521 (uint64_t z[static 9], uint64_t x[static 9]);
657extern void bignum_sqr_p521_alt (uint64_t z[static 9], uint64_t x[static 9]);
658
659// Subtract, z := x - y
660// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
661extern uint64_t bignum_sub (uint64_t p, uint64_t *z, uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
662
663// Subtract modulo p_25519, z := (x - y) mod p_25519, assuming x and y reduced
664// Inputs x[4], y[4]; output z[4]
665extern void bignum_sub_p25519 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]);
666
667// Subtract modulo p_256, z := (x - y) mod p_256, assuming x and y reduced
668// Inputs x[4], y[4]; output z[4]
669extern void bignum_sub_p256 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]);
670
671// Subtract modulo p_256k1, z := (x - y) mod p_256k1, assuming x and y reduced
672// Inputs x[4], y[4]; output z[4]
673extern void bignum_sub_p256k1 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]);
674
675// Subtract modulo p_384, z := (x - y) mod p_384, assuming x and y reduced
676// Inputs x[6], y[6]; output z[6]
677extern void bignum_sub_p384 (uint64_t z[static 6], uint64_t x[static 6], uint64_t y[static 6]);
678
679// Subtract modulo p_521, z := (x - y) mod p_521, assuming x and y reduced
680// Inputs x[9], y[9]; output z[9]
681extern void bignum_sub_p521 (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]);
682
683// Convert 4-digit (256-bit) bignum to big-endian bytes
684// Input x[4]; output z[32] (bytes)
685extern void bignum_tobebytes_4 (uint8_t z[static 32], uint64_t x[static 4]);
686
687// Convert 6-digit (384-bit) bignum to big-endian bytes
688// Input x[6]; output z[48] (bytes)
689extern void bignum_tobebytes_6 (uint8_t z[static 48], uint64_t x[static 6]);
690
691// Convert 4-digit (256-bit) bignum to little-endian bytes
692// Input x[4]; output z[32] (bytes)
693extern void bignum_tolebytes_4 (uint8_t z[static 32], uint64_t x[static 4]);
694
695// Convert 6-digit (384-bit) bignum to little-endian bytes
696// Input x[6]; output z[48] (bytes)
697extern void bignum_tolebytes_6 (uint8_t z[static 48], uint64_t x[static 6]);
698
699// Convert 9-digit 528-bit bignum to little-endian bytes
700// Input x[6]; output z[66] (bytes)
701extern void bignum_tolebytes_p521 (uint8_t z[static 66], uint64_t x[static 9]);
702
703// Convert to Montgomery form z := (2^256 * x) mod p_256
704// Input x[4]; output z[4]
705extern void bignum_tomont_p256 (uint64_t z[static 4], uint64_t x[static 4]);
706extern void bignum_tomont_p256_alt (uint64_t z[static 4], uint64_t x[static 4]);
707
708// Convert to Montgomery form z := (2^256 * x) mod p_256k1
709// Input x[4]; output z[4]
710extern void bignum_tomont_p256k1 (uint64_t z[static 4], uint64_t x[static 4]);
711extern void bignum_tomont_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4]);
712
713// Convert to Montgomery form z := (2^384 * x) mod p_384
714// Input x[6]; output z[6]
715extern void bignum_tomont_p384 (uint64_t z[static 6], uint64_t x[static 6]);
716extern void bignum_tomont_p384_alt (uint64_t z[static 6], uint64_t x[static 6]);
717
718// Convert to Montgomery form z := (2^576 * x) mod p_521
719// Input x[9]; output z[9]
720extern void bignum_tomont_p521 (uint64_t z[static 9], uint64_t x[static 9]);
721
722// Triple modulo p_256, z := (3 * x) mod p_256
723// Input x[4]; output z[4]
724extern void bignum_triple_p256 (uint64_t z[static 4], uint64_t x[static 4]);
725extern void bignum_triple_p256_alt (uint64_t z[static 4], uint64_t x[static 4]);
726
727// Triple modulo p_256k1, z := (3 * x) mod p_256k1
728// Input x[4]; output z[4]
729extern void bignum_triple_p256k1 (uint64_t z[static 4], uint64_t x[static 4]);
730extern void bignum_triple_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4]);
731
732// Triple modulo p_384, z := (3 * x) mod p_384
733// Input x[6]; output z[6]
734extern void bignum_triple_p384 (uint64_t z[static 6], uint64_t x[static 6]);
735extern void bignum_triple_p384_alt (uint64_t z[static 6], uint64_t x[static 6]);
736
737// Triple modulo p_521, z := (3 * x) mod p_521, assuming x reduced
738// Input x[9]; output z[9]
739extern void bignum_triple_p521 (uint64_t z[static 9], uint64_t x[static 9]);
740extern void bignum_triple_p521_alt (uint64_t z[static 9], uint64_t x[static 9]);
741
742// Montgomery ladder step for curve25519
743// Inputs point[8], pp[16], b; output rr[16]
744extern void curve25519_ladderstep(uint64_t rr[16],uint64_t point[8],uint64_t pp[16],uint64_t b);
745extern void curve25519_ladderstep_alt(uint64_t rr[16],uint64_t point[8],uint64_t pp[16],uint64_t b);
746
747// Projective scalar multiplication, x coordinate only, for curve25519
748// Inputs scalar[4], point[4]; output res[8]
749extern void curve25519_pxscalarmul(uint64_t res[static 8],uint64_t scalar[static 4],uint64_t point[static 4]);
750extern void curve25519_pxscalarmul_alt(uint64_t res[static 8],uint64_t scalar[static 4],uint64_t point[static 4]);
751
752// x25519 function for curve25519
753// Inputs scalar[4], point[4]; output res[4]
754extern void curve25519_x25519(uint64_t res[static 4],uint64_t scalar[static 4],uint64_t point[static 4]);
755extern void curve25519_x25519_alt(uint64_t res[static 4],uint64_t scalar[static 4],uint64_t point[static 4]);
756
757// x25519 function for curve25519 on base element 9
758// Input scalar[4]; output res[4]
759extern void curve25519_x25519base(uint64_t res[static 4],uint64_t scalar[static 4]);
760extern void curve25519_x25519base_alt(uint64_t res[static 4],uint64_t scalar[static 4]);
761
762// Extended projective addition for edwards25519
763// Inputs p1[16], p2[16]; output p3[16]
764extern void edwards25519_epadd(uint64_t p3[static 16],uint64_t p1[static 16],uint64_t p2[static 16]);
765extern void edwards25519_epadd_alt(uint64_t p3[static 16],uint64_t p1[static 16],uint64_t p2[static 16]);
766
767// Extended projective doubling for edwards25519
768// Inputs p1[12]; output p3[16]
769extern void edwards25519_epdouble(uint64_t p3[static 16],uint64_t p1[static 12]);
770extern void edwards25519_epdouble_alt(uint64_t p3[static 16],uint64_t p1[static 12]);
771
772// Projective doubling for edwards25519
773// Inputs p1[12]; output p3[12]
774extern void edwards25519_pdouble(uint64_t p3[static 12],uint64_t p1[static 12]);
775extern void edwards25519_pdouble_alt(uint64_t p3[static 12],uint64_t p1[static 12]);
776
777// Extended projective + precomputed mixed addition for edwards25519
778// Inputs p1[16], p2[12]; output p3[16]
779extern void edwards25519_pepadd(uint64_t p3[static 16],uint64_t p1[static 16],uint64_t p2[static 12]);
780extern void edwards25519_pepadd_alt(uint64_t p3[static 16],uint64_t p1[static 16],uint64_t p2[static 12]);
781
782// Point addition on NIST curve P-256 in Montgomery-Jacobian coordinates
783// Inputs p1[12], p2[12]; output p3[12]
784extern void p256_montjadd(uint64_t p3[static 12],uint64_t p1[static 12],uint64_t p2[static 12]);
785
786// Point doubling on NIST curve P-256 in Montgomery-Jacobian coordinates
787// Inputs p1[12]; output p3[12]
788extern void p256_montjdouble(uint64_t p3[static 12],uint64_t p1[static 12]);
789
790// Point mixed addition on NIST curve P-256 in Montgomery-Jacobian coordinates
791// Inputs p1[12], p2[8]; output p3[12]
792extern void p256_montjmixadd(uint64_t p3[static 12],uint64_t p1[static 12],uint64_t p2[static 8]);
793
794// Point addition on NIST curve P-384 in Montgomery-Jacobian coordinates
795// Inputs p1[18], p2[18]; output p3[18]
796extern void p384_montjadd(uint64_t p3[static 18],uint64_t p1[static 18],uint64_t p2[static 18]);
797
798// Point doubling on NIST curve P-384 in Montgomery-Jacobian coordinates
799// Inputs p1[18]; output p3[18]
800extern void p384_montjdouble(uint64_t p3[static 18],uint64_t p1[static 18]);
801
802// Point mixed addition on NIST curve P-384 in Montgomery-Jacobian coordinates
803// Inputs p1[18], p2[12]; output p3[18]
804extern void p384_montjmixadd(uint64_t p3[static 18],uint64_t p1[static 18],uint64_t p2[static 12]);
805
806// Point addition on NIST curve P-521 in Jacobian coordinates
807// Inputs p1[27], p2[27]; output p3[27]
808extern void p521_jadd(uint64_t p3[static 27],uint64_t p1[static 27],uint64_t p2[static 27]);
809
810// Point doubling on NIST curve P-521 in Jacobian coordinates
811// Input p1[27]; output p3[27]
812extern void p521_jdouble(uint64_t p3[static 27],uint64_t p1[static 27]);
813
814// Point mixed addition on NIST curve P-521 in Jacobian coordinates
815// Inputs p1[27], p2[18]; output p3[27]
816extern void p521_jmixadd(uint64_t p3[static 27],uint64_t p1[static 27],uint64_t p2[static 18]);
817
818// Point addition on SECG curve secp256k1 in Jacobian coordinates
819// Inputs p1[12], p2[12]; output p3[12]
820extern void secp256k1_jadd(uint64_t p3[static 12],uint64_t p1[static 12],uint64_t p2[static 12]);
821
822// Point doubling on SECG curve secp256k1 in Jacobian coordinates
823// Input p1[12]; output p3[12]
824extern void secp256k1_jdouble(uint64_t p3[static 12],uint64_t p1[static 12]);
825
826// Point mixed addition on SECG curve secp256k1 in Jacobian coordinates
827// Inputs p1[12], p2[8]; output p3[12]
828extern void secp256k1_jmixadd(uint64_t p3[static 12],uint64_t p1[static 12],uint64_t p2[static 8]);
829
830// Reverse the bytes in a single word
831// Input a; output function return
832extern uint64_t word_bytereverse (uint64_t a);
833
834// Count leading zero bits in a single word
835// Input a; output function return
836extern uint64_t word_clz (uint64_t a);
837
838// Count trailing zero bits in a single word
839// Input a; output function return
840extern uint64_t word_ctz (uint64_t a);
841
842// Return maximum of two unsigned 64-bit words
843// Inputs a, b; output function return
844extern uint64_t word_max (uint64_t a, uint64_t b);
845
846// Return minimum of two unsigned 64-bit words
847// Inputs a, b; output function return
848extern uint64_t word_min (uint64_t a, uint64_t b);
849
850// Single-word negated modular inverse (-1/a) mod 2^64
851// Input a; output function return
852extern uint64_t word_negmodinv (uint64_t a);
853
854// Single-word reciprocal, 2^64 + ret = ceil(2^128/a) - 1 if MSB of "a" is set
855// Input a; output function return
856extern uint64_t word_recip (uint64_t a);
diff --git a/src/lib/libcrypto/bn/s2n_bignum_internal.h b/src/lib/libcrypto/bn/s2n_bignum_internal.h
deleted file mode 100644
index b82db7d019..0000000000
--- a/src/lib/libcrypto/bn/s2n_bignum_internal.h
+++ /dev/null
@@ -1,36 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15#ifdef __APPLE__
16# define S2N_BN_SYMBOL(NAME) _##NAME
17#else
18# define S2N_BN_SYMBOL(name) name
19#endif
20
21#ifdef __CET__
22# include <cet.h>
23#else
24# define _CET_ENDBR
25#endif
26
27#define S2N_BN_SYM_VISIBILITY_DIRECTIVE(name) .globl S2N_BN_SYMBOL(name)
28#ifdef S2N_BN_HIDE_SYMBOLS
29# ifdef __APPLE__
30# define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) .private_extern S2N_BN_SYMBOL(name)
31# else
32# define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) .hidden S2N_BN_SYMBOL(name)
33# endif
34#else
35# define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) /* NO-OP: S2N_BN_SYM_PRIVACY_DIRECTIVE */
36#endif