summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn')
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_add.S51
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S32
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S28
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_modadd.S112
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_modsub.S99
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_mul.S25
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8.S187
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S8
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12.S223
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12_alt.S199
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16.S273
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S9
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S29
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8.S158
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S8
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12.S227
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12_alt.S210
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16.S311
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S7
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sub.S47
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bn_arch.c141
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bn_arch.h12
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/word_clz.S6
-rw-r--r--src/lib/libcrypto/bn/arch/i386/bn_arch.h7
-rw-r--r--src/lib/libcrypto/bn/arch/mips64/bn_arch.h7
-rw-r--r--src/lib/libcrypto/bn/arch/powerpc/bn_arch.h7
-rw-r--r--src/lib/libcrypto/bn/asm/bn-586.pl15
-rw-r--r--src/lib/libcrypto/bn/asm/mips.pl96
-rw-r--r--src/lib/libcrypto/bn/asm/ppc.pl32
-rwxr-xr-xsrc/lib/libcrypto/bn/asm/x86-mont.pl3
-rw-r--r--src/lib/libcrypto/bn/bn.h54
-rw-r--r--src/lib/libcrypto/bn/bn_add.c76
-rw-r--r--src/lib/libcrypto/bn/bn_add_sub.c178
-rw-r--r--src/lib/libcrypto/bn/bn_const.c294
-rw-r--r--src/lib/libcrypto/bn/bn_convert.c12
-rw-r--r--src/lib/libcrypto/bn/bn_ctx.c4
-rw-r--r--src/lib/libcrypto/bn/bn_div.c17
-rw-r--r--src/lib/libcrypto/bn/bn_exp.c5
-rw-r--r--src/lib/libcrypto/bn/bn_gcd.c11
-rw-r--r--src/lib/libcrypto/bn/bn_internal.h27
-rw-r--r--src/lib/libcrypto/bn/bn_isqrt.c4
-rw-r--r--src/lib/libcrypto/bn/bn_lib.c7
-rw-r--r--src/lib/libcrypto/bn/bn_local.h41
-rw-r--r--src/lib/libcrypto/bn/bn_mod.c5
-rw-r--r--src/lib/libcrypto/bn/bn_mod_sqrt.c5
-rw-r--r--src/lib/libcrypto/bn/bn_mod_words.c110
-rw-r--r--src/lib/libcrypto/bn/bn_mont.c78
-rw-r--r--src/lib/libcrypto/bn/bn_mul.c141
-rw-r--r--src/lib/libcrypto/bn/bn_prime.c10
-rw-r--r--src/lib/libcrypto/bn/bn_rand.c5
-rw-r--r--src/lib/libcrypto/bn/bn_recp.c5
-rw-r--r--src/lib/libcrypto/bn/bn_shift.c4
-rw-r--r--src/lib/libcrypto/bn/bn_sqr.c76
-rw-r--r--src/lib/libcrypto/bn/bn_word.c4
-rw-r--r--src/lib/libcrypto/bn/s2n_bignum.h793
-rw-r--r--src/lib/libcrypto/bn/s2n_bignum_internal.h35
56 files changed, 3827 insertions, 743 deletions
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S b/src/lib/libcrypto/bn/arch/amd64/bignum_add.S
index 5fe4aae7a1..1d4e6d08ef 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_add.S
@@ -1,3 +1,5 @@
1// $OpenBSD: bignum_add.S,v 1.7 2025/08/11 14:13:56 jsing Exp $
2//
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// 4//
3// Permission to use, copy, modify, and/or distribute this software for any 5// Permission to use, copy, modify, and/or distribute this software for any
@@ -16,9 +18,8 @@
16// Add, z := x + y 18// Add, z := x + y
17// Inputs x[m], y[n]; outputs function return (carry-out) and z[p] 19// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
18// 20//
19// extern uint64_t bignum_add 21// extern uint64_t bignum_add(uint64_t p, uint64_t *z, uint64_t m,
20// (uint64_t p, uint64_t *z, 22// const uint64_t *x, uint64_t n, const uint64_t *y);
21// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
22// 23//
23// Does the z := x + y operation, truncating modulo p words in general and 24// Does the z := x + y operation, truncating modulo p words in general and
24// returning a top carry (0 or 1) in the p'th place, only adding the input 25// returning a top carry (0 or 1) in the p'th place, only adding the input
@@ -49,7 +50,7 @@
49 50
50 51
51S2N_BN_SYMBOL(bignum_add): 52S2N_BN_SYMBOL(bignum_add):
52 _CET_ENDBR 53 _CET_ENDBR
53 54
54#if WINDOWS_ABI 55#if WINDOWS_ABI
55 push rdi 56 push rdi
@@ -75,7 +76,7 @@ S2N_BN_SYMBOL(bignum_add):
75 cmp p, n 76 cmp p, n
76 cmovc n, p 77 cmovc n, p
77 cmp m, n 78 cmp m, n
78 jc ylonger 79 jc bignum_add_ylonger
79 80
80// The case where x is longer or of the same size (p >= m >= n) 81// The case where x is longer or of the same size (p >= m >= n)
81 82
@@ -83,27 +84,27 @@ S2N_BN_SYMBOL(bignum_add):
83 sub m, n 84 sub m, n
84 inc m 85 inc m
85 test n, n 86 test n, n
86 jz xtest 87 jz bignum_add_xtest
87xmainloop: 88bignum_add_xmainloop:
88 mov a, [x+8*i] 89 mov a, [x+8*i]
89 adc a, [y+8*i] 90 adc a, [y+8*i]
90 mov [z+8*i],a 91 mov [z+8*i],a
91 inc i 92 inc i
92 dec n 93 dec n
93 jnz xmainloop 94 jnz bignum_add_xmainloop
94 jmp xtest 95 jmp bignum_add_xtest
95xtoploop: 96bignum_add_xtoploop:
96 mov a, [x+8*i] 97 mov a, [x+8*i]
97 adc a, 0 98 adc a, 0
98 mov [z+8*i],a 99 mov [z+8*i],a
99 inc i 100 inc i
100xtest: 101bignum_add_xtest:
101 dec m 102 dec m
102 jnz xtoploop 103 jnz bignum_add_xtoploop
103 mov ashort, 0 104 mov ashort, 0
104 adc a, 0 105 adc a, 0
105 test p, p 106 test p, p
106 jnz tails 107 jnz bignum_add_tails
107#if WINDOWS_ABI 108#if WINDOWS_ABI
108 pop rsi 109 pop rsi
109 pop rdi 110 pop rdi
@@ -112,30 +113,30 @@ xtest:
112 113
113// The case where y is longer (p >= n > m) 114// The case where y is longer (p >= n > m)
114 115
115ylonger: 116bignum_add_ylonger:
116 117
117 sub p, n 118 sub p, n
118 sub n, m 119 sub n, m
119 test m, m 120 test m, m
120 jz ytoploop 121 jz bignum_add_ytoploop
121ymainloop: 122bignum_add_ymainloop:
122 mov a, [x+8*i] 123 mov a, [x+8*i]
123 adc a, [y+8*i] 124 adc a, [y+8*i]
124 mov [z+8*i],a 125 mov [z+8*i],a
125 inc i 126 inc i
126 dec m 127 dec m
127 jnz ymainloop 128 jnz bignum_add_ymainloop
128ytoploop: 129bignum_add_ytoploop:
129 mov a, [y+8*i] 130 mov a, [y+8*i]
130 adc a, 0 131 adc a, 0
131 mov [z+8*i],a 132 mov [z+8*i],a
132 inc i 133 inc i
133 dec n 134 dec n
134 jnz ytoploop 135 jnz bignum_add_ytoploop
135 mov ashort, 0 136 mov ashort, 0
136 adc a, 0 137 adc a, 0
137 test p, p 138 test p, p
138 jnz tails 139 jnz bignum_add_tails
139#if WINDOWS_ABI 140#if WINDOWS_ABI
140 pop rsi 141 pop rsi
141 pop rdi 142 pop rdi
@@ -144,16 +145,16 @@ ytoploop:
144 145
145// Adding a non-trivial tail, when p > max(m,n) 146// Adding a non-trivial tail, when p > max(m,n)
146 147
147tails: 148bignum_add_tails:
148 mov [z+8*i],a 149 mov [z+8*i],a
149 xor a, a 150 xor a, a
150 jmp tail 151 jmp bignum_add_tail
151tailloop: 152bignum_add_tailloop:
152 mov [z+8*i],a 153 mov [z+8*i],a
153tail: 154bignum_add_tail:
154 inc i 155 inc i
155 dec p 156 dec p
156 jnz tailloop 157 jnz bignum_add_tailloop
157#if WINDOWS_ABI 158#if WINDOWS_ABI
158 pop rsi 159 pop rsi
159 pop rdi 160 pop rdi
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S
index 25ba17bce2..a611919603 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S
@@ -1,3 +1,5 @@
1// $OpenBSD: bignum_cmadd.S,v 1.7 2025/08/11 14:13:56 jsing Exp $
2//
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// 4//
3// Permission to use, copy, modify, and/or distribute this software for any 5// Permission to use, copy, modify, and/or distribute this software for any
@@ -16,8 +18,8 @@
16// Multiply-add with single-word multiplier, z := z + c * y 18// Multiply-add with single-word multiplier, z := z + c * y
17// Inputs c, y[n]; outputs function return (carry-out) and z[k] 19// Inputs c, y[n]; outputs function return (carry-out) and z[k]
18// 20//
19// extern uint64_t bignum_cmadd 21// extern uint64_t bignum_cmadd(uint64_t k, uint64_t *z, uint64_t c, uint64_t n,
20// (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); 22// const uint64_t *y);
21// 23//
22// Does the "z := z + c * y" operation where y is n digits, result z is p. 24// Does the "z := z + c * y" operation where y is n digits, result z is p.
23// Truncates the result in general. 25// Truncates the result in general.
@@ -54,7 +56,7 @@
54 56
55 57
56S2N_BN_SYMBOL(bignum_cmadd): 58S2N_BN_SYMBOL(bignum_cmadd):
57 _CET_ENDBR 59 _CET_ENDBR
58 60
59#if WINDOWS_ABI 61#if WINDOWS_ABI
60 push rdi 62 push rdi
@@ -82,7 +84,7 @@ S2N_BN_SYMBOL(bignum_cmadd):
82 84
83 xor h, h 85 xor h, h
84 test n, n 86 test n, n
85 jz end 87 jz bignum_cmadd_end
86 88
87// Move c into a safer register as multiplies overwrite rdx 89// Move c into a safer register as multiplies overwrite rdx
88 90
@@ -96,11 +98,11 @@ S2N_BN_SYMBOL(bignum_cmadd):
96 mov h, rdx 98 mov h, rdx
97 mov ishort, 1 99 mov ishort, 1
98 dec n 100 dec n
99 jz hightail 101 jz bignum_cmadd_hightail
100 102
101// Main loop, where we always have CF + previous high part h to add in 103// Main loop, where we always have CF + previous high part h to add in
102 104
103loop: 105bignum_cmadd_loop:
104 adc h, [z+8*i] 106 adc h, [z+8*i]
105 sbb r, r 107 sbb r, r
106 mov rax, [x+8*i] 108 mov rax, [x+8*i]
@@ -111,36 +113,36 @@ loop:
111 mov h, rdx 113 mov h, rdx
112 inc i 114 inc i
113 dec n 115 dec n
114 jnz loop 116 jnz bignum_cmadd_loop
115 117
116hightail: 118bignum_cmadd_hightail:
117 adc h, 0 119 adc h, 0
118 120
119// Propagate the carry all the way to the end with h as extra carry word 121// Propagate the carry all the way to the end with h as extra carry word
120 122
121tail: 123bignum_cmadd_tail:
122 test p, p 124 test p, p
123 jz end 125 jz bignum_cmadd_end
124 126
125 add [z+8*i], h 127 add [z+8*i], h
126 mov hshort, 0 128 mov hshort, 0
127 inc i 129 inc i
128 dec p 130 dec p
129 jz highend 131 jz bignum_cmadd_highend
130 132
131tloop: 133bignum_cmadd_tloop:
132 adc [z+8*i], h 134 adc [z+8*i], h
133 inc i 135 inc i
134 dec p 136 dec p
135 jnz tloop 137 jnz bignum_cmadd_tloop
136 138
137highend: 139bignum_cmadd_highend:
138 140
139 adc h, 0 141 adc h, 0
140 142
141// Return the high/carry word 143// Return the high/carry word
142 144
143end: 145bignum_cmadd_end:
144 mov rax, h 146 mov rax, h
145 147
146 pop rbx 148 pop rbx
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S
index 12f785d63a..eb71d9da44 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S
@@ -1,3 +1,5 @@
1// $OpenBSD: bignum_cmul.S,v 1.7 2025/08/11 14:13:56 jsing Exp $
2//
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// 4//
3// Permission to use, copy, modify, and/or distribute this software for any 5// Permission to use, copy, modify, and/or distribute this software for any
@@ -16,8 +18,8 @@
16// Multiply by a single word, z := c * y 18// Multiply by a single word, z := c * y
17// Inputs c, y[n]; outputs function return (carry-out) and z[k] 19// Inputs c, y[n]; outputs function return (carry-out) and z[k]
18// 20//
19// extern uint64_t bignum_cmul 21// extern uint64_t bignum_cmul(uint64_t k, uint64_t *z, uint64_t c, uint64_t n,
20// (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); 22// const uint64_t *y);
21// 23//
22// Does the "z := c * y" operation where y is n digits, result z is p. 24// Does the "z := c * y" operation where y is n digits, result z is p.
23// Truncates the result in general unless p >= n + 1. 25// Truncates the result in general unless p >= n + 1.
@@ -51,7 +53,7 @@
51 53
52 54
53S2N_BN_SYMBOL(bignum_cmul): 55S2N_BN_SYMBOL(bignum_cmul):
54 _CET_ENDBR 56 _CET_ENDBR
55 57
56#if WINDOWS_ABI 58#if WINDOWS_ABI
57 push rdi 59 push rdi
@@ -76,7 +78,7 @@ S2N_BN_SYMBOL(bignum_cmul):
76 xor h, h 78 xor h, h
77 xor i, i 79 xor i, i
78 test n, n 80 test n, n
79 jz tail 81 jz bignum_cmul_tail
80 82
81// Move c into a safer register as multiplies overwrite rdx 83// Move c into a safer register as multiplies overwrite rdx
82 84
@@ -90,11 +92,11 @@ S2N_BN_SYMBOL(bignum_cmul):
90 mov h, rdx 92 mov h, rdx
91 inc i 93 inc i
92 cmp i, n 94 cmp i, n
93 jz tail 95 jz bignum_cmul_tail
94 96
95// Main loop doing the multiplications 97// Main loop doing the multiplications
96 98
97loop: 99bignum_cmul_loop:
98 mov rax, [x+8*i] 100 mov rax, [x+8*i]
99 mul c 101 mul c
100 add rax, h 102 add rax, h
@@ -103,28 +105,28 @@ loop:
103 mov h, rdx 105 mov h, rdx
104 inc i 106 inc i
105 cmp i, n 107 cmp i, n
106 jc loop 108 jc bignum_cmul_loop
107 109
108// Add a tail when the destination is longer 110// Add a tail when the destination is longer
109 111
110tail: 112bignum_cmul_tail:
111 cmp i, p 113 cmp i, p
112 jnc end 114 jnc bignum_cmul_end
113 mov [z+8*i], h 115 mov [z+8*i], h
114 xor h, h 116 xor h, h
115 inc i 117 inc i
116 cmp i, p 118 cmp i, p
117 jnc end 119 jnc bignum_cmul_end
118 120
119tloop: 121bignum_cmul_tloop:
120 mov [z+8*i], h 122 mov [z+8*i], h
121 inc i 123 inc i
122 cmp i, p 124 cmp i, p
123 jc tloop 125 jc bignum_cmul_tloop
124 126
125// Return the high/carry word 127// Return the high/carry word
126 128
127end: 129bignum_cmul_end:
128 mov rax, h 130 mov rax, h
129 131
130#if WINDOWS_ABI 132#if WINDOWS_ABI
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_modadd.S b/src/lib/libcrypto/bn/arch/amd64/bignum_modadd.S
new file mode 100644
index 0000000000..baf27fdc7f
--- /dev/null
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_modadd.S
@@ -0,0 +1,112 @@
1// $OpenBSD: bignum_modadd.S,v 1.4 2025/08/12 10:23:40 jsing Exp $
2//
3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4//
5// Permission to use, copy, modify, and/or distribute this software for any
6// purpose with or without fee is hereby granted, provided that the above
7// copyright notice and this permission notice appear in all copies.
8//
9// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
17// ----------------------------------------------------------------------------
18// Add modulo m, z := (x + y) mod m, assuming x and y reduced
19// Inputs x[k], y[k], m[k]; output z[k]
20//
21// extern void bignum_modadd(uint64_t k, uint64_t *z, const uint64_t *x,
22// const uint64_t *y, const uint64_t *m);
23//
24// Standard x86-64 ABI: RDI = k, RSI = z, RDX = x, RCX = y, R8 = m
25// Microsoft x64 ABI: RCX = k, RDX = z, R8 = x, R9 = y, [RSP+40] = m
26// ----------------------------------------------------------------------------
27
28#include "s2n_bignum_internal.h"
29
30 .intel_syntax noprefix
31 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_modadd)
32 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_modadd)
33 .text
34
35#define k rdi
36#define z rsi
37#define x rdx
38#define y rcx
39#define m r8
40#define i r9
41#define j r10
42#define a rax
43#define c r11
44
45S2N_BN_SYMBOL(bignum_modadd):
46 _CET_ENDBR
47
48#if WINDOWS_ABI
49 push rdi
50 push rsi
51 mov rdi, rcx
52 mov rsi, rdx
53 mov rdx, r8
54 mov rcx, r9
55 mov r8, [rsp+56]
56#endif
57
58// If k = 0 do nothing
59
60 test k, k
61 jz bignum_modadd_end
62
63// First just add (c::z) := x + y
64
65 xor c, c
66 mov j, k
67 xor i, i
68bignum_modadd_addloop:
69 mov a, [x+8*i]
70 adc a, [y+8*i]
71 mov [z+8*i], a
72 inc i
73 dec j
74 jnz bignum_modadd_addloop
75 adc c, 0
76
77// Now do a comparison subtraction (c::z) - m, recording mask for (c::z) >= m
78
79 mov j, k
80 xor i, i
81bignum_modadd_cmploop:
82 mov a, [z+8*i]
83 sbb a, [m+8*i]
84 inc i
85 dec j
86 jnz bignum_modadd_cmploop
87 sbb c, 0
88 not c
89
90// Now do a masked subtraction z := z - [c] * m
91
92 xor i, i
93bignum_modadd_subloop:
94 mov a, [m+8*i]
95 and a, c
96 neg j
97 sbb [z+8*i], a
98 sbb j, j
99 inc i
100 cmp i, k
101 jc bignum_modadd_subloop
102
103bignum_modadd_end:
104#if WINDOWS_ABI
105 pop rsi
106 pop rdi
107#endif
108 ret
109
110#if defined(__linux__) && defined(__ELF__)
111.section .note.GNU-stack,"",%progbits
112#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_modsub.S b/src/lib/libcrypto/bn/arch/amd64/bignum_modsub.S
new file mode 100644
index 0000000000..63b3230e35
--- /dev/null
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_modsub.S
@@ -0,0 +1,99 @@
1// $OpenBSD: bignum_modsub.S,v 1.4 2025/08/12 10:23:40 jsing Exp $
2//
3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4//
5// Permission to use, copy, modify, and/or distribute this software for any
6// purpose with or without fee is hereby granted, provided that the above
7// copyright notice and this permission notice appear in all copies.
8//
9// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
17// ----------------------------------------------------------------------------
18// Subtract modulo m, z := (x - y) mod m, assuming x and y reduced
19// Inputs x[k], y[k], m[k]; output z[k]
20//
21// extern void bignum_modsub(uint64_t k, uint64_t *z, const uint64_t *x,
22// const uint64_t *y, const uint64_t *m);
23//
24// Standard x86-64 ABI: RDI = k, RSI = z, RDX = x, RCX = y, R8 = m
25// Microsoft x64 ABI: RCX = k, RDX = z, R8 = x, R9 = y, [RSP+40] = m
26// ----------------------------------------------------------------------------
27
28#include "s2n_bignum_internal.h"
29
30 .intel_syntax noprefix
31 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_modsub)
32 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_modsub)
33 .text
34
35#define k rdi
36#define z rsi
37#define x rdx
38#define y rcx
39#define m r8
40#define i r9
41#define j r10
42#define a rax
43#define c r11
44
45S2N_BN_SYMBOL(bignum_modsub):
46 _CET_ENDBR
47
48#if WINDOWS_ABI
49 push rdi
50 push rsi
51 mov rdi, rcx
52 mov rsi, rdx
53 mov rdx, r8
54 mov rcx, r9
55 mov r8, [rsp+56]
56#endif
57
58// If k = 0 do nothing
59
60 test k, k
61 jz bignum_modsub_end
62
63// Subtract z := x - y and record a mask for the carry x - y < 0
64
65 xor c, c
66 mov j, k
67 xor i, i
68bignum_modsub_subloop:
69 mov a, [x+8*i]
70 sbb a, [y+8*i]
71 mov [z+8*i], a
72 inc i
73 dec j
74 jnz bignum_modsub_subloop
75 sbb c, c
76
77// Now do a masked addition z := z + [c] * m
78
79 xor i, i
80bignum_modsub_addloop:
81 mov a, [m+8*i]
82 and a, c
83 neg j
84 adc [z+8*i], a
85 sbb j, j
86 inc i
87 cmp i, k
88 jc bignum_modsub_addloop
89
90bignum_modsub_end:
91#if WINDOWS_ABI
92 pop rsi
93 pop rdi
94#endif
95 ret
96
97#if defined(__linux__) && defined(__ELF__)
98.section .note.GNU-stack,"",%progbits
99#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S
index a3552679a2..538cce9af7 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S
@@ -1,3 +1,5 @@
1// $OpenBSD: bignum_mul.S,v 1.7 2025/08/11 14:13:56 jsing Exp $
2//
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// 4//
3// Permission to use, copy, modify, and/or distribute this software for any 5// Permission to use, copy, modify, and/or distribute this software for any
@@ -16,9 +18,8 @@
16// Multiply z := x * y 18// Multiply z := x * y
17// Inputs x[m], y[n]; output z[k] 19// Inputs x[m], y[n]; output z[k]
18// 20//
19// extern void bignum_mul 21// extern void bignum_mul(uint64_t k, uint64_t *z, uint64_t m, const uint64_t *x,
20// (uint64_t k, uint64_t *z, 22// uint64_t n, const uint64_t *y);
21// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
22// 23//
23// Does the "z := x * y" operation where x is m digits, y is n, result z is k. 24// Does the "z := x * y" operation where x is m digits, y is n, result z is k.
24// Truncates the result in general unless k >= m + n 25// Truncates the result in general unless k >= m + n
@@ -59,7 +60,7 @@
59 60
60 61
61S2N_BN_SYMBOL(bignum_mul): 62S2N_BN_SYMBOL(bignum_mul):
62 _CET_ENDBR 63 _CET_ENDBR
63 64
64#if WINDOWS_ABI 65#if WINDOWS_ABI
65 push rdi 66 push rdi
@@ -88,7 +89,7 @@ S2N_BN_SYMBOL(bignum_mul):
88// If we did a multiply-add variant, however, then we could 89// If we did a multiply-add variant, however, then we could
89 90
90 test p, p 91 test p, p
91 jz end 92 jz bignum_mul_end
92 93
93// Set initial 2-part sum to zero (we zero c inside the body) 94// Set initial 2-part sum to zero (we zero c inside the body)
94 95
@@ -99,7 +100,7 @@ S2N_BN_SYMBOL(bignum_mul):
99 100
100 xor k, k 101 xor k, k
101 102
102outerloop: 103bignum_mul_outerloop:
103 104
104// Zero our carry term first; we eventually want it and a zero is useful now 105// Zero our carry term first; we eventually want it and a zero is useful now
105// Set a = max 0 (k + 1 - n), i = min (k + 1) m 106// Set a = max 0 (k + 1 - n), i = min (k + 1) m
@@ -125,11 +126,11 @@ outerloop:
125 mov d, k 126 mov d, k
126 sub d, i 127 sub d, i
127 sub i, a 128 sub i, a
128 jbe innerend 129 jbe bignum_mul_innerend
129 lea x,[rcx+8*a] 130 lea x,[rcx+8*a]
130 lea y,[r9+8*d-8] 131 lea y,[r9+8*d-8]
131 132
132innerloop: 133bignum_mul_innerloop:
133 mov rax, [y+8*i] 134 mov rax, [y+8*i]
134 mul QWORD PTR [x] 135 mul QWORD PTR [x]
135 add x, 8 136 add x, 8
@@ -137,9 +138,9 @@ innerloop:
137 adc h, rdx 138 adc h, rdx
138 adc c, 0 139 adc c, 0
139 dec i 140 dec i
140 jnz innerloop 141 jnz bignum_mul_innerloop
141 142
142innerend: 143bignum_mul_innerend:
143 144
144 mov [z], l 145 mov [z], l
145 mov l, h 146 mov l, h
@@ -147,9 +148,9 @@ innerend:
147 add z, 8 148 add z, 8
148 149
149 cmp k, p 150 cmp k, p
150 jc outerloop 151 jc bignum_mul_outerloop
151 152
152end: 153bignum_mul_end:
153 pop r15 154 pop r15
154 pop r14 155 pop r14
155 pop r13 156 pop r13
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8.S
new file mode 100644
index 0000000000..d6ad514020
--- /dev/null
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8.S
@@ -0,0 +1,187 @@
1// $OpenBSD: bignum_mul_4_8.S,v 1.4 2025/08/12 10:23:40 jsing Exp $
2//
3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4//
5// Permission to use, copy, modify, and/or distribute this software for any
6// purpose with or without fee is hereby granted, provided that the above
7// copyright notice and this permission notice appear in all copies.
8//
9// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
17// ----------------------------------------------------------------------------
18// Multiply z := x * y
19// Inputs x[4], y[4]; output z[8]
20//
21// extern void bignum_mul_4_8(uint64_t z[static 8], const uint64_t x[static 4],
22// const uint64_t y[static 4]);
23//
24// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y
25// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y
26// ----------------------------------------------------------------------------
27
28#include "s2n_bignum_internal.h"
29
30 .intel_syntax noprefix
31 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_4_8)
32 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_4_8)
33 .text
34
35// These are actually right
36
37#define z rdi
38#define x rsi
39
40// Copied in or set up
41
42#define y rcx
43
44// A zero register
45
46#define zero rbp
47#define zeroe ebp
48
49// Add in x[i] * rdx to the (i,i+1) position with the register window
50// Would be nice to have conditional expressions reg[i], reg[i+1] ...
51
52.macro mulpadd arg1,arg2
53 mulx rbx, rax, [x+8*\arg2]
54.if ((\arg1 + \arg2) % 4 == 0)
55 adcx r8, rax
56 adox r9, rbx
57.elseif ((\arg1 + \arg2) % 4 == 1)
58 adcx r9, rax
59 adox r10, rbx
60.elseif ((\arg1 + \arg2) % 4 == 2)
61 adcx r10, rax
62 adox r11, rbx
63.elseif ((\arg1 + \arg2) % 4 == 3)
64 adcx r11, rax
65 adox r8, rbx
66.endif
67
68.endm
69
70
71// Add in the whole j'th row
72
73.macro addrow arg1
74 mov rdx, [y+8*\arg1]
75 xor zeroe, zeroe
76
77 mulpadd \arg1, 0
78
79.if (\arg1 % 4 == 0)
80 mov [z+8*\arg1],r8
81.elseif (\arg1 % 4 == 1)
82 mov [z+8*\arg1],r9
83.elseif (\arg1 % 4 == 2)
84 mov [z+8*\arg1],r10
85.elseif (\arg1 % 4 == 3)
86 mov [z+8*\arg1],r11
87.endif
88
89 mulpadd \arg1, 1
90 mulpadd \arg1, 2
91
92.if (\arg1 % 4 == 0)
93 mulx r8, rax, [x+24]
94 adcx r11, rax
95 adox r8, zero
96 adcx r8, zero
97.elseif (\arg1 % 4 == 1)
98 mulx r9, rax, [x+24]
99 adcx r8, rax
100 adox r9, zero
101 adcx r9, zero
102.elseif (\arg1 % 4 == 2)
103 mulx r10, rax, [x+24]
104 adcx r9, rax
105 adox r10, zero
106 adcx r10, zero
107.elseif (\arg1 % 4 == 3)
108 mulx r11, rax, [x+24]
109 adcx r10, rax
110 adox r11, zero
111 adcx r11, zero
112.endif
113
114.endm
115
116
117
118S2N_BN_SYMBOL(bignum_mul_4_8):
119 _CET_ENDBR
120
121#if WINDOWS_ABI
122 push rdi
123 push rsi
124 mov rdi, rcx
125 mov rsi, rdx
126 mov rdx, r8
127#endif
128
129// Save more registers to play with
130
131 push rbp
132 push rbx
133
134// Copy y into a safe register to start with
135
136 mov y, rdx
137
138// Zero a register, which also makes sure we don't get a fake carry-in
139
140 xor zeroe, zeroe
141
142// Do the zeroth row, which is a bit different
143// Write back the zero-zero product and then accumulate
144// r8,r11,r10,r9 as y[0] * x from 1..4
145
146 mov rdx, [y]
147
148 mulx r9, r8, [x]
149 mov [z], r8
150
151 mulx r10, rbx, [x+8]
152 adcx r9, rbx
153
154 mulx r11, rbx, [x+16]
155 adcx r10, rbx
156
157 mulx r8, rbx, [x+24]
158 adcx r11, rbx
159 adcx r8, zero
160
161// Now all the other rows in a uniform pattern
162
163 addrow 1
164 addrow 2
165 addrow 3
166
167// Now write back the additional columns
168
169 mov [z+32], r8
170 mov [z+40], r9
171 mov [z+48], r10
172 mov [z+56], r11
173
174// Restore registers and return
175
176 pop rbx
177 pop rbp
178
179#if WINDOWS_ABI
180 pop rsi
181 pop rdi
182#endif
183 ret
184
185#if defined(__linux__) && defined(__ELF__)
186.section .note.GNU-stack,"",%progbits
187#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S
index 70ff69e372..2592d1d658 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S
@@ -1,3 +1,5 @@
1// $OpenBSD: bignum_mul_4_8_alt.S,v 1.7 2025/08/11 14:13:56 jsing Exp $
2//
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// 4//
3// Permission to use, copy, modify, and/or distribute this software for any 5// Permission to use, copy, modify, and/or distribute this software for any
@@ -16,8 +18,8 @@
16// Multiply z := x * y 18// Multiply z := x * y
17// Inputs x[4], y[4]; output z[8] 19// Inputs x[4], y[4]; output z[8]
18// 20//
19// extern void bignum_mul_4_8_alt 21// extern void bignum_mul_4_8_alt(uint64_t z[static 8], const uint64_t x[static 4],
20// (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]); 22// const uint64_t y[static 4]);
21// 23//
22// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y 24// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y
23// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y 25// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y
@@ -72,7 +74,7 @@
72 adc h, rdx 74 adc h, rdx
73 75
74S2N_BN_SYMBOL(bignum_mul_4_8_alt): 76S2N_BN_SYMBOL(bignum_mul_4_8_alt):
75 _CET_ENDBR 77 _CET_ENDBR
76 78
77#if WINDOWS_ABI 79#if WINDOWS_ABI
78 push rdi 80 push rdi
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12.S
new file mode 100644
index 0000000000..56cbdf06e0
--- /dev/null
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12.S
@@ -0,0 +1,223 @@
1// $OpenBSD: bignum_mul_6_12.S,v 1.4 2025/08/12 10:23:40 jsing Exp $
2//
3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4//
5// Permission to use, copy, modify, and/or distribute this software for any
6// purpose with or without fee is hereby granted, provided that the above
7// copyright notice and this permission notice appear in all copies.
8//
9// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
17// ----------------------------------------------------------------------------
18// Multiply z := x * y
19// Inputs x[6], y[6]; output z[12]
20//
21// extern void bignum_mul_6_12(uint64_t z[static 12], const uint64_t x[static 6],
22// const uint64_t y[static 6]);
23//
24// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y
25// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y
26// ----------------------------------------------------------------------------
27
28#include "s2n_bignum_internal.h"
29
30 .intel_syntax noprefix
31 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_6_12)
32 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_6_12)
33 .text
34
35// These are actually right
36
37#define z rdi
38#define x rsi
39
40// Copied in or set up
41
42#define y rcx
43
44// A zero register
45
46#define zero rbp
47#define zeroe ebp
48
49// Add in x[i] * rdx to the (i,i+1) position with the register window
50// Would be nice to have conditional expressions reg[i], reg[i+1] ...
51
52.macro mulpadd arg1,arg2
53 mulx rbx, rax, [x+8*\arg2]
54.if ((\arg1 + \arg2) % 6 == 0)
55 adcx r8, rax
56 adox r9, rbx
57.elseif ((\arg1 + \arg2) % 6 == 1)
58 adcx r9, rax
59 adox r10, rbx
60.elseif ((\arg1 + \arg2) % 6 == 2)
61 adcx r10, rax
62 adox r11, rbx
63.elseif ((\arg1 + \arg2) % 6 == 3)
64 adcx r11, rax
65 adox r12, rbx
66.elseif ((\arg1 + \arg2) % 6 == 4)
67 adcx r12, rax
68 adox r13, rbx
69.elseif ((\arg1 + \arg2) % 6 == 5)
70 adcx r13, rax
71 adox r8, rbx
72.endif
73
74.endm
75
76
77// Add in the whole j'th row
78
79.macro addrow arg1
80 mov rdx, [y+8*\arg1]
81 xor zeroe, zeroe
82
83 mulpadd \arg1, 0
84
85.if (\arg1 % 6 == 0)
86 mov [z+8*\arg1],r8
87.elseif (\arg1 % 6 == 1)
88 mov [z+8*\arg1],r9
89.elseif (\arg1 % 6 == 2)
90 mov [z+8*\arg1],r10
91.elseif (\arg1 % 6 == 3)
92 mov [z+8*\arg1],r11
93.elseif (\arg1 % 6 == 4)
94 mov [z+8*\arg1],r12
95.elseif (\arg1 % 6 == 5)
96 mov [z+8*\arg1],r13
97.endif
98
99 mulpadd \arg1, 1
100 mulpadd \arg1, 2
101 mulpadd \arg1, 3
102 mulpadd \arg1, 4
103
104.if (\arg1 % 6 == 0)
105 mulx r8, rax, [x+40]
106 adcx r13, rax
107 adox r8, zero
108 adcx r8, zero
109.elseif (\arg1 % 6 == 1)
110 mulx r9, rax, [x+40]
111 adcx r8, rax
112 adox r9, zero
113 adcx r9, zero
114.elseif (\arg1 % 6 == 2)
115 mulx r10, rax, [x+40]
116 adcx r9, rax
117 adox r10, zero
118 adcx r10, zero
119.elseif (\arg1 % 6 == 3)
120 mulx r11, rax, [x+40]
121 adcx r10, rax
122 adox r11, zero
123 adcx r11, zero
124.elseif (\arg1 % 6 == 4)
125 mulx r12, rax, [x+40]
126 adcx r11, rax
127 adox r12, zero
128 adcx r12, zero
129.elseif (\arg1 % 6 == 5)
130 mulx r13, rax, [x+40]
131 adcx r12, rax
132 adox r13, zero
133 adcx r13, zero
134.endif
135
136.endm
137
138
139
140S2N_BN_SYMBOL(bignum_mul_6_12):
141 _CET_ENDBR
142
143#if WINDOWS_ABI
144 push rdi
145 push rsi
146 mov rdi, rcx
147 mov rsi, rdx
148 mov rdx, r8
149#endif
150
151// Save more registers to play with
152
153 push rbp
154 push rbx
155 push r12
156 push r13
157
158// Copy y into a safe register to start with
159
160 mov y, rdx
161
162// Zero a register, which also makes sure we don't get a fake carry-in
163
164 xor zeroe, zeroe
165
166// Do the zeroth row, which is a bit different
167// Write back the zero-zero product and then accumulate
168// r8,r13,r12,r11,r10,r9 as y[0] * x from 1..6
169
170 mov rdx, [y]
171
172 mulx r9, r8, [x]
173 mov [z], r8
174
175 mulx r10, rbx, [x+8]
176 adcx r9, rbx
177
178 mulx r11, rbx, [x+16]
179 adcx r10, rbx
180
181 mulx r12, rbx, [x+24]
182 adcx r11, rbx
183
184 mulx r13, rbx, [x+32]
185 adcx r12, rbx
186
187 mulx r8, rbx, [x+40]
188 adcx r13, rbx
189 adcx r8, zero
190
191// Now all the other rows in a uniform pattern
192
193 addrow 1
194 addrow 2
195 addrow 3
196 addrow 4
197 addrow 5
198
199// Now write back the additional columns
200
201 mov [z+48], r8
202 mov [z+56], r9
203 mov [z+64], r10
204 mov [z+72], r11
205 mov [z+80], r12
206 mov [z+88], r13
207
208// Restore registers and return
209
210 pop r13
211 pop r12
212 pop rbx
213 pop rbp
214
215#if WINDOWS_ABI
216 pop rsi
217 pop rdi
218#endif
219 ret
220
221#if defined(__linux__) && defined(__ELF__)
222.section .note.GNU-stack,"",%progbits
223#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12_alt.S
new file mode 100644
index 0000000000..077c52b38e
--- /dev/null
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12_alt.S
@@ -0,0 +1,199 @@
1// $OpenBSD: bignum_mul_6_12_alt.S,v 1.4 2025/08/12 10:23:40 jsing Exp $
2//
3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4//
5// Permission to use, copy, modify, and/or distribute this software for any
6// purpose with or without fee is hereby granted, provided that the above
7// copyright notice and this permission notice appear in all copies.
8//
9// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
17// ----------------------------------------------------------------------------
18// Multiply z := x * y
19// Inputs x[6], y[6]; output z[12]
20//
21// extern void bignum_mul_6_12_alt(uint64_t z[static 12],
22// const uint64_t x[static 6],
23// const uint64_t y[static 6]);
24//
25// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y
26// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y
27// ----------------------------------------------------------------------------
28
29#include "s2n_bignum_internal.h"
30
31 .intel_syntax noprefix
32 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_6_12_alt)
33 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_6_12_alt)
34 .text
35
36// These are actually right
37
38#define z rdi
39#define x rsi
40
41// This is moved from rdx to free it for muls
42
43#define y rcx
44
45// Other variables used as a rotating 3-word window to add terms to
46
47#define t0 r8
48#define t1 r9
49#define t2 r10
50
51// Macro for the key "multiply and add to (c,h,l)" step
52
53#define combadd(c,h,l,numa,numb) \
54 mov rax, numa; \
55 mul QWORD PTR numb; \
56 add l, rax; \
57 adc h, rdx; \
58 adc c, 0
59
60// A minutely shorter form for when c = 0 initially
61
62#define combadz(c,h,l,numa,numb) \
63 mov rax, numa; \
64 mul QWORD PTR numb; \
65 add l, rax; \
66 adc h, rdx; \
67 adc c, c
68
69// A short form where we don't expect a top carry
70
71#define combads(h,l,numa,numb) \
72 mov rax, numa; \
73 mul QWORD PTR numb; \
74 add l, rax; \
75 adc h, rdx
76
77S2N_BN_SYMBOL(bignum_mul_6_12_alt):
78 _CET_ENDBR
79
80#if WINDOWS_ABI
81 push rdi
82 push rsi
83 mov rdi, rcx
84 mov rsi, rdx
85 mov rdx, r8
86#endif
87
88// Copy y into a safe register to start with
89
90 mov y, rdx
91
92// Result term 0
93
94 mov rax, [x]
95 mul QWORD PTR [y]
96
97 mov [z], rax
98 mov t0, rdx
99 xor t1, t1
100
101// Result term 1
102
103 xor t2, t2
104 combads(t1,t0,[x],[y+8])
105 combadz(t2,t1,t0,[x+8],[y])
106 mov [z+8], t0
107
108// Result term 2
109
110 xor t0, t0
111 combadz(t0,t2,t1,[x],[y+16])
112 combadd(t0,t2,t1,[x+8],[y+8])
113 combadd(t0,t2,t1,[x+16],[y])
114 mov [z+16], t1
115
116// Result term 3
117
118 xor t1, t1
119 combadz(t1,t0,t2,[x],[y+24])
120 combadd(t1,t0,t2,[x+8],[y+16])
121 combadd(t1,t0,t2,[x+16],[y+8])
122 combadd(t1,t0,t2,[x+24],[y])
123 mov [z+24], t2
124
125// Result term 4
126
127 xor t2, t2
128 combadz(t2,t1,t0,[x],[y+32])
129 combadd(t2,t1,t0,[x+8],[y+24])
130 combadd(t2,t1,t0,[x+16],[y+16])
131 combadd(t2,t1,t0,[x+24],[y+8])
132 combadd(t2,t1,t0,[x+32],[y])
133 mov [z+32], t0
134
135// Result term 5
136
137 xor t0, t0
138 combadz(t0,t2,t1,[x],[y+40])
139 combadd(t0,t2,t1,[x+8],[y+32])
140 combadd(t0,t2,t1,[x+16],[y+24])
141 combadd(t0,t2,t1,[x+24],[y+16])
142 combadd(t0,t2,t1,[x+32],[y+8])
143 combadd(t0,t2,t1,[x+40],[y])
144 mov [z+40], t1
145
146// Result term 6
147
148 xor t1, t1
149 combadz(t1,t0,t2,[x+8],[y+40])
150 combadd(t1,t0,t2,[x+16],[y+32])
151 combadd(t1,t0,t2,[x+24],[y+24])
152 combadd(t1,t0,t2,[x+32],[y+16])
153 combadd(t1,t0,t2,[x+40],[y+8])
154 mov [z+48], t2
155
156// Result term 7
157
158 xor t2, t2
159 combadz(t2,t1,t0,[x+16],[y+40])
160 combadd(t2,t1,t0,[x+24],[y+32])
161 combadd(t2,t1,t0,[x+32],[y+24])
162 combadd(t2,t1,t0,[x+40],[y+16])
163 mov [z+56], t0
164
165// Result term 8
166
167 xor t0, t0
168 combadz(t0,t2,t1,[x+24],[y+40])
169 combadd(t0,t2,t1,[x+32],[y+32])
170 combadd(t0,t2,t1,[x+40],[y+24])
171 mov [z+64], t1
172
173// Result term 9
174
175 xor t1, t1
176 combadz(t1,t0,t2,[x+32],[y+40])
177 combadd(t1,t0,t2,[x+40],[y+32])
178 mov [z+72], t2
179
180// Result term 10
181
182 combads(t1,t0,[x+40],[y+40])
183 mov [z+80], t0
184
185// Result term 11
186
187 mov [z+88], t1
188
189// Return
190
191#if WINDOWS_ABI
192 pop rsi
193 pop rdi
194#endif
195 ret
196
197#if defined(__linux__) && defined(__ELF__)
198.section .note.GNU-stack,"",%progbits
199#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16.S
new file mode 100644
index 0000000000..faa0196d8e
--- /dev/null
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16.S
@@ -0,0 +1,273 @@
1// $OpenBSD: bignum_mul_8_16.S,v 1.4 2025/08/12 10:23:40 jsing Exp $
2//
3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4//
5// Permission to use, copy, modify, and/or distribute this software for any
6// purpose with or without fee is hereby granted, provided that the above
7// copyright notice and this permission notice appear in all copies.
8//
9// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
17// ----------------------------------------------------------------------------
18// Multiply z := x * y
19// Inputs x[8], y[8]; output z[16]
20//
21// extern void bignum_mul_8_16(uint64_t z[static 16], const uint64_t x[static 8],
22// const uint64_t y[static 8]);
23//
24// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y
25// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y
26// ----------------------------------------------------------------------------
27
28#include "s2n_bignum_internal.h"
29
30 .intel_syntax noprefix
31 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_8_16)
32 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_8_16)
33 .text
34
35// These are actually right
36
37#define z rdi
38#define x rsi
39
40// Copied in or set up
41
42#define y rcx
43
44// A zero register
45
46#define zero rbp
47#define zeroe ebp
48
49// mulpadd i, j adds x[i] * rdx (now assumed = y[j]) into the window at i+j
50
51.macro mulpadd arg1,arg2
52 mulx rbx, rax, [x+8*\arg1]
53.if ((\arg1 + \arg2) % 8 == 0)
54 adcx r8, rax
55 adox r9, rbx
56.elseif ((\arg1 + \arg2) % 8 == 1)
57 adcx r9, rax
58 adox r10, rbx
59.elseif ((\arg1 + \arg2) % 8 == 2)
60 adcx r10, rax
61 adox r11, rbx
62.elseif ((\arg1 + \arg2) % 8 == 3)
63 adcx r11, rax
64 adox r12, rbx
65.elseif ((\arg1 + \arg2) % 8 == 4)
66 adcx r12, rax
67 adox r13, rbx
68.elseif ((\arg1 + \arg2) % 8 == 5)
69 adcx r13, rax
70 adox r14, rbx
71.elseif ((\arg1 + \arg2) % 8 == 6)
72 adcx r14, rax
73 adox r15, rbx
74.elseif ((\arg1 + \arg2) % 8 == 7)
75 adcx r15, rax
76 adox r8, rbx
77.endif
78
79.endm
80
81// mulpade i, j adds x[i] * rdx (now assumed = y[j]) into the window at i+j
82// but re-creates the top word assuming nothing to add there
83
84.macro mulpade arg1,arg2
85.if ((\arg1 + \arg2) % 8 == 0)
86 mulx r9, rax, [x+8*\arg1]
87 adcx r8, rax
88 adox r9, zero
89.elseif ((\arg1 + \arg2) % 8 == 1)
90 mulx r10, rax, [x+8*\arg1]
91 adcx r9, rax
92 adox r10, zero
93.elseif ((\arg1 + \arg2) % 8 == 2)
94 mulx r11, rax, [x+8*\arg1]
95 adcx r10, rax
96 adox r11, zero
97.elseif ((\arg1 + \arg2) % 8 == 3)
98 mulx r12, rax, [x+8*\arg1]
99 adcx r11, rax
100 adox r12, zero
101.elseif ((\arg1 + \arg2) % 8 == 4)
102 mulx r13, rax, [x+8*\arg1]
103 adcx r12, rax
104 adox r13, zero
105.elseif ((\arg1 + \arg2) % 8 == 5)
106 mulx r14, rax, [x+8*\arg1]
107 adcx r13, rax
108 adox r14, zero
109.elseif ((\arg1 + \arg2) % 8 == 6)
110 mulx r15, rax, [x+8*\arg1]
111 adcx r14, rax
112 adox r15, zero
113.elseif ((\arg1 + \arg2) % 8 == 7)
114 mulx r8, rax, [x+8*\arg1]
115 adcx r15, rax
116 adox r8, zero
117.endif
118
119.endm
120
121// Add in the whole j'th row
122
123.macro addrow arg1
124 mov rdx, [y+8*\arg1]
125 xor zeroe, zeroe
126
127 mulpadd 0, \arg1
128
129.if (\arg1 % 8 == 0)
130 mov [z+8*\arg1],r8
131.elseif (\arg1 % 8 == 1)
132 mov [z+8*\arg1],r9
133.elseif (\arg1 % 8 == 2)
134 mov [z+8*\arg1],r10
135.elseif (\arg1 % 8 == 3)
136 mov [z+8*\arg1],r11
137.elseif (\arg1 % 8 == 4)
138 mov [z+8*\arg1],r12
139.elseif (\arg1 % 8 == 5)
140 mov [z+8*\arg1],r13
141.elseif (\arg1 % 8 == 6)
142 mov [z+8*\arg1],r14
143.elseif (\arg1 % 8 == 7)
144 mov [z+8*\arg1],r15
145.endif
146
147 mulpadd 1, \arg1
148 mulpadd 2, \arg1
149 mulpadd 3, \arg1
150 mulpadd 4, \arg1
151 mulpadd 5, \arg1
152 mulpadd 6, \arg1
153 mulpade 7, \arg1
154
155.if (\arg1 % 8 == 0)
156 adc r8, zero
157.elseif (\arg1 % 8 == 1)
158 adc r9, zero
159.elseif (\arg1 % 8 == 2)
160 adc r10, zero
161.elseif (\arg1 % 8 == 3)
162 adc r11, zero
163.elseif (\arg1 % 8 == 4)
164 adc r12, zero
165.elseif (\arg1 % 8 == 5)
166 adc r13, zero
167.elseif (\arg1 % 8 == 6)
168 adc r14, zero
169.elseif (\arg1 % 8 == 7)
170 adc r15, zero
171.endif
172
173.endm
174
175
176S2N_BN_SYMBOL(bignum_mul_8_16):
177 _CET_ENDBR
178
179#if WINDOWS_ABI
180 push rdi
181 push rsi
182 mov rdi, rcx
183 mov rsi, rdx
184 mov rdx, r8
185#endif
186
187// Save more registers to play with
188
189 push rbp
190 push rbx
191 push r12
192 push r13
193 push r14
194 push r15
195
196// Copy y into a safe register to start with
197
198 mov y, rdx
199
200// Zero a register, which also makes sure we don't get a fake carry-in
201
202 xor zeroe, zeroe
203
204// Do the zeroth row, which is a bit different
205// Write back the zero-zero product and then accumulate
206// r8,r15,r14,r13,r12,r11,r10,r9 as y[0] * x from 1..8
207
208 mov rdx, [y]
209
210 mulx r9, r8, [x]
211 mov [z], r8
212
213 mulx r10, rbx, [x+8]
214 adc r9, rbx
215
216 mulx r11, rbx, [x+16]
217 adc r10, rbx
218
219 mulx r12, rbx, [x+24]
220 adc r11, rbx
221
222 mulx r13, rbx, [x+32]
223 adc r12, rbx
224
225 mulx r14, rbx, [x+40]
226 adc r13, rbx
227
228 mulx r15, rbx, [x+48]
229 adc r14, rbx
230
231 mulx r8, rbx, [x+56]
232 adc r15, rbx
233 adc r8, zero
234
235// Now all the other rows in a uniform pattern
236
237 addrow 1
238 addrow 2
239 addrow 3
240 addrow 4
241 addrow 5
242 addrow 6
243 addrow 7
244
245// Now write back the additional columns
246
247 mov [z+64], r8
248 mov [z+72], r9
249 mov [z+80], r10
250 mov [z+88], r11
251 mov [z+96], r12
252 mov [z+104], r13
253 mov [z+112], r14
254 mov [z+120], r15
255
256// Real epilog
257
258 pop r15
259 pop r14
260 pop r13
261 pop r12
262 pop rbx
263 pop rbp
264
265#if WINDOWS_ABI
266 pop rsi
267 pop rdi
268#endif
269 ret
270
271#if defined(__linux__) && defined(__ELF__)
272.section .note.GNU-stack,"",%progbits
273#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S
index 066403b074..0e30b9170f 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S
@@ -1,3 +1,5 @@
1// $OpenBSD: bignum_mul_8_16_alt.S,v 1.7 2025/08/11 14:13:56 jsing Exp $
2//
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// 4//
3// Permission to use, copy, modify, and/or distribute this software for any 5// Permission to use, copy, modify, and/or distribute this software for any
@@ -16,8 +18,9 @@
16// Multiply z := x * y 18// Multiply z := x * y
17// Inputs x[8], y[8]; output z[16] 19// Inputs x[8], y[8]; output z[16]
18// 20//
19// extern void bignum_mul_8_16_alt 21// extern void bignum_mul_8_16_alt(uint64_t z[static 16],
20// (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]); 22// const uint64_t x[static 8],
23// const uint64_t y[static 8]);
21// 24//
22// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y 25// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y
23// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y 26// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y
@@ -72,7 +75,7 @@
72 adc h, rdx 75 adc h, rdx
73 76
74S2N_BN_SYMBOL(bignum_mul_8_16_alt): 77S2N_BN_SYMBOL(bignum_mul_8_16_alt):
75 _CET_ENDBR 78 _CET_ENDBR
76 79
77#if WINDOWS_ABI 80#if WINDOWS_ABI
78 push rdi 81 push rdi
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S
index 54e3f59442..86f1af2ac4 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S
@@ -1,3 +1,5 @@
1// $OpenBSD: bignum_sqr.S,v 1.7 2025/08/11 14:13:56 jsing Exp $
2//
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// 4//
3// Permission to use, copy, modify, and/or distribute this software for any 5// Permission to use, copy, modify, and/or distribute this software for any
@@ -16,8 +18,7 @@
16// Square z := x^2 18// Square z := x^2
17// Input x[n]; output z[k] 19// Input x[n]; output z[k]
18// 20//
19// extern void bignum_sqr 21// extern void bignum_sqr(uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x);
20// (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x);
21// 22//
22// Does the "z := x^2" operation where x is n digits and result z is k. 23// Does the "z := x^2" operation where x is n digits and result z is k.
23// Truncates the result in general unless k >= 2 * n 24// Truncates the result in general unless k >= 2 * n
@@ -62,7 +63,7 @@
62#define llshort ebp 63#define llshort ebp
63 64
64S2N_BN_SYMBOL(bignum_sqr): 65S2N_BN_SYMBOL(bignum_sqr):
65 _CET_ENDBR 66 _CET_ENDBR
66 67
67#if WINDOWS_ABI 68#if WINDOWS_ABI
68 push rdi 69 push rdi
@@ -86,7 +87,7 @@ S2N_BN_SYMBOL(bignum_sqr):
86// If p = 0 the result is trivial and nothing needs doing 87// If p = 0 the result is trivial and nothing needs doing
87 88
88 test p, p 89 test p, p
89 jz end 90 jz bignum_sqr_end
90 91
91// initialize (hh,ll) = 0 92// initialize (hh,ll) = 0
92 93
@@ -97,7 +98,7 @@ S2N_BN_SYMBOL(bignum_sqr):
97 98
98 xor k, k 99 xor k, k
99 100
100outerloop: 101bignum_sqr_outerloop:
101 102
102// First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n 103// First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n
103// We want to accumulate all x[i] * x[k - i] for bot <= i < top 104// We want to accumulate all x[i] * x[k - i] for bot <= i < top
@@ -122,7 +123,7 @@ outerloop:
122// If htop <= bot then main doubled part of the sum is empty 123// If htop <= bot then main doubled part of the sum is empty
123 124
124 cmp i, htop 125 cmp i, htop
125 jnc nosumming 126 jnc bignum_sqr_nosumming
126 127
127// Use a moving pointer for [y] = x[k-i] for the cofactor 128// Use a moving pointer for [y] = x[k-i] for the cofactor
128 129
@@ -132,7 +133,7 @@ outerloop:
132 133
133// Do the main part of the sum x[i] * x[k - i] for 2 * i < k 134// Do the main part of the sum x[i] * x[k - i] for 2 * i < k
134 135
135innerloop: 136bignum_sqr_innerloop:
136 mov a, [x+8*i] 137 mov a, [x+8*i]
137 mul QWORD PTR [y] 138 mul QWORD PTR [y]
138 add l, a 139 add l, a
@@ -141,7 +142,7 @@ innerloop:
141 sub y, 8 142 sub y, 8
142 inc i 143 inc i
143 cmp i, htop 144 cmp i, htop
144 jc innerloop 145 jc bignum_sqr_innerloop
145 146
146// Now double it 147// Now double it
147 148
@@ -151,11 +152,11 @@ innerloop:
151 152
152// If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term 153// If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term
153 154
154nosumming: 155bignum_sqr_nosumming:
155 test k, 1 156 test k, 1
156 jnz innerend 157 jnz bignum_sqr_innerend
157 cmp i, n 158 cmp i, n
158 jnc innerend 159 jnc bignum_sqr_innerend
159 160
160 mov a, [x+8*i] 161 mov a, [x+8*i]
161 mul a 162 mul a
@@ -165,7 +166,7 @@ nosumming:
165 166
166// Now add the local sum into the global sum, store and shift 167// Now add the local sum into the global sum, store and shift
167 168
168innerend: 169bignum_sqr_innerend:
169 add l, ll 170 add l, ll
170 mov [z+8*k], l 171 mov [z+8*k], l
171 adc h, hh 172 adc h, hh
@@ -175,11 +176,11 @@ innerend:
175 176
176 inc k 177 inc k
177 cmp k, p 178 cmp k, p
178 jc outerloop 179 jc bignum_sqr_outerloop
179 180
180// Restore registers and return 181// Restore registers and return
181 182
182end: 183bignum_sqr_end:
183 pop r15 184 pop r15
184 pop r14 185 pop r14
185 pop r13 186 pop r13
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8.S
new file mode 100644
index 0000000000..25664782f7
--- /dev/null
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8.S
@@ -0,0 +1,158 @@
1// $OpenBSD: bignum_sqr_4_8.S,v 1.4 2025/08/12 10:23:40 jsing Exp $
2//
3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4//
5// Permission to use, copy, modify, and/or distribute this software for any
6// purpose with or without fee is hereby granted, provided that the above
7// copyright notice and this permission notice appear in all copies.
8//
9// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
17// ----------------------------------------------------------------------------
18// Square, z := x^2
19// Input x[4]; output z[8]
20//
21// extern void bignum_sqr_4_8(uint64_t z[static 8], const uint64_t x[static 4]);
22//
23// Standard x86-64 ABI: RDI = z, RSI = x
24// Microsoft x64 ABI: RCX = z, RDX = x
25// ----------------------------------------------------------------------------
26
27#include "s2n_bignum_internal.h"
28
29 .intel_syntax noprefix
30 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_4_8)
31 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_4_8)
32 .text
33
34// These are actually right
35
36#define z rdi
37#define x rsi
38
39// A zero register
40
41#define zero rbp
42#define zeroe ebp
43
44// Other registers
45
46#define d1 r8
47#define d2 r9
48#define d3 r10
49#define d4 r11
50#define d5 r12
51#define d6 r13
52
53
54
55S2N_BN_SYMBOL(bignum_sqr_4_8):
56 _CET_ENDBR
57
58#if WINDOWS_ABI
59 push rdi
60 push rsi
61 mov rdi, rcx
62 mov rsi, rdx
63#endif
64
65// Save more registers to play with
66
67 push rbp
68 push r12
69 push r13
70
71// Set up an initial window [d6;...d1] = [23;03;01]
72
73 mov rdx, [x]
74 mulx d2, d1, [x+8]
75 mulx d4, d3, [x+24]
76 mov rdx, [x+16]
77 mulx d6, d5, [x+24]
78
79// Clear our zero register, and also initialize the flags for the carry chain
80
81 xor zeroe, zeroe
82
83// Chain in the addition of 02 + 12 + 13 to that window (no carry-out possible)
84// This gives all the "heterogeneous" terms of the squaring ready to double
85
86 mulx rcx, rax, [x]
87 adcx d2, rax
88 adox d3, rcx
89 mulx rcx, rax, [x+8]
90 adcx d3, rax
91 adox d4, rcx
92 mov rdx, [x+24]
93 mulx rcx, rax, [x+8]
94 adcx d4, rax
95 adox d5, rcx
96 adcx d5, zero
97 adox d6, zero
98 adcx d6, zero
99
100// In principle this is otiose as CF and OF carries are absorbed at this point
101// However it seems helpful for the OOO engine to be told it's a fresh start
102
103 xor zeroe, zeroe
104
105// Double and add to the 00 + 11 + 22 + 33 terms
106//
107// We could use shift-double but this seems tidier and in larger squarings
108// it was actually more efficient. I haven't experimented with this small
109// case to see how much that matters. Note: the writeback here is sprinkled
110// into the sequence in such a way that things still work if z = x, i.e. if
111// the output overwrites the input buffer and beyond.
112
113 mov rdx, [x]
114 mulx rdx, rax, rdx
115 mov [z], rax
116 adcx d1, d1
117 adox d1, rdx
118 mov rdx, [x+8]
119 mov [z+8], d1
120 mulx rdx, rax, rdx
121 adcx d2, d2
122 adox d2, rax
123 adcx d3, d3
124 adox d3, rdx
125 mov rdx, [x+16]
126 mov [z+16], d2
127 mulx rdx, rax, rdx
128 adcx d4, d4
129 adox d4, rax
130 adcx d5, d5
131 adox d5, rdx
132 mov rdx, [x+24]
133 mov [z+24], d3
134 mulx rdx, rax, rdx
135 mov [z+32], d4
136 adcx d6, d6
137 mov [z+40], d5
138 adox d6, rax
139 mov [z+48], d6
140 adcx rdx, zero
141 adox rdx, zero
142 mov [z+56], rdx
143
144// Restore saved registers and return
145
146 pop r13
147 pop r12
148 pop rbp
149
150#if WINDOWS_ABI
151 pop rsi
152 pop rdi
153#endif
154 ret
155
156#if defined(__linux__) && defined(__ELF__)
157.section .note.GNU-stack,"",%progbits
158#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S
index 7c534ae907..7eafac3284 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S
@@ -1,3 +1,5 @@
1// $OpenBSD: bignum_sqr_4_8_alt.S,v 1.7 2025/08/11 14:13:56 jsing Exp $
2//
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// 4//
3// Permission to use, copy, modify, and/or distribute this software for any 5// Permission to use, copy, modify, and/or distribute this software for any
@@ -16,8 +18,8 @@
16// Square, z := x^2 18// Square, z := x^2
17// Input x[4]; output z[8] 19// Input x[4]; output z[8]
18// 20//
19// extern void bignum_sqr_4_8_alt 21// extern void bignum_sqr_4_8_alt(uint64_t z[static 8],
20// (uint64_t z[static 8], uint64_t x[static 4]); 22// const uint64_t x[static 4]);
21// 23//
22// Standard x86-64 ABI: RDI = z, RSI = x 24// Standard x86-64 ABI: RDI = z, RSI = x
23// Microsoft x64 ABI: RCX = z, RDX = x 25// Microsoft x64 ABI: RCX = z, RDX = x
@@ -71,7 +73,7 @@
71 adc c, 0 73 adc c, 0
72 74
73S2N_BN_SYMBOL(bignum_sqr_4_8_alt): 75S2N_BN_SYMBOL(bignum_sqr_4_8_alt):
74 _CET_ENDBR 76 _CET_ENDBR
75 77
76#if WINDOWS_ABI 78#if WINDOWS_ABI
77 push rdi 79 push rdi
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12.S
new file mode 100644
index 0000000000..3f055e8b75
--- /dev/null
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12.S
@@ -0,0 +1,227 @@
1// $OpenBSD: bignum_sqr_6_12.S,v 1.4 2025/08/12 10:23:40 jsing Exp $
2//
3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4//
5// Permission to use, copy, modify, and/or distribute this software for any
6// purpose with or without fee is hereby granted, provided that the above
7// copyright notice and this permission notice appear in all copies.
8//
9// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
17// ----------------------------------------------------------------------------
18// Square, z := x^2
19// Input x[6]; output z[12]
20//
21// extern void bignum_sqr_6_12(uint64_t z[static 12], const uint64_t x[static 6]);
22//
23// Standard x86-64 ABI: RDI = z, RSI = x
24// Microsoft x64 ABI: RCX = z, RDX = x
25// ----------------------------------------------------------------------------
26
27#include "s2n_bignum_internal.h"
28
29 .intel_syntax noprefix
30 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_6_12)
31 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_6_12)
32 .text
33
34// These are actually right
35
36#define z rdi
37#define x rsi
38
39// A zero register
40
41#define zero rbp
42#define zeroe ebp
43
44// Other registers
45
46#define d1 r8
47#define d2 r9
48#define d3 r10
49#define d4 r11
50#define d5 r12
51#define d6 r13
52#define d7 r14
53#define d8 r15
54#define d9 rbx
55
56// Care is needed: re-using the zero register
57
58#define d10 rbp
59
60
61S2N_BN_SYMBOL(bignum_sqr_6_12):
62 _CET_ENDBR
63
64#if WINDOWS_ABI
65 push rdi
66 push rsi
67 mov rdi, rcx
68 mov rsi, rdx
69#endif
70
71// Save more registers to play with
72
73 push rbp
74 push rbx
75 push r12
76 push r13
77 push r14
78 push r15
79
80// Set up an initial window [d8;...d1] = [34;05;03;01]
81
82 mov rdx, [x]
83 mulx d2, d1, [x+8]
84 mulx d4, d3, [x+24]
85 mulx d6, d5, [x+40]
86 mov rdx, [x+24]
87 mulx d8, d7, [x+32]
88
89// Clear our zero register, and also initialize the flags for the carry chain
90
91 xor zeroe, zeroe
92
93// Chain in the addition of 02 + 12 + 13 + 14 + 15 to that window
94// (no carry-out possible since we add it to the top of a product)
95
96 mov rdx, [x+16]
97 mulx rcx, rax, [x]
98 adcx d2, rax
99 adox d3, rcx
100 mulx rcx, rax, [x+8]
101 adcx d3, rax
102 adox d4, rcx
103 mov rdx, [x+8]
104 mulx rcx, rax, [x+24]
105 adcx d4, rax
106 adox d5, rcx
107 mulx rcx, rax, [x+32]
108 adcx d5, rax
109 adox d6, rcx
110 mulx rcx, rax, [x+40]
111 adcx d6, rax
112 adox d7, rcx
113 adcx d7, zero
114 adox d8, zero
115 adcx d8, zero
116
117// Again zero out the flags. Actually they are already cleared but it may
118// help decouple these in the OOO engine not to wait for the chain above
119
120 xor zeroe, zeroe
121
122// Now chain in the 04 + 23 + 24 + 25 + 35 + 45 terms
123// We are running out of registers and here our zero register is not zero!
124
125 mov rdx, [x+32]
126 mulx rcx, rax, [x]
127 adcx d4, rax
128 adox d5, rcx
129 mov rdx, [x+16]
130 mulx rcx, rax, [x+24]
131 adcx d5, rax
132 adox d6, rcx
133 mulx rcx, rax, [x+32]
134 adcx d6, rax
135 adox d7, rcx
136 mulx rcx, rax, [x+40]
137 adcx d7, rax
138 adox d8, rcx
139 mov rdx, [x+24]
140 mulx d9, rax, [x+40]
141 adcx d8, rax
142 adox d9, zero
143 mov rdx, [x+32]
144 mulx d10, rax, [x+40]
145 adcx d9, rax
146 mov eax, 0
147 adox d10, rax
148 adcx d10, rax
149
150// Again, just for a clear fresh start for the flags
151
152 xor eax, eax
153
154// Double and add to the 00 + 11 + 22 + 33 + 44 + 55 terms
155//
156// We could use shift-double but this seems tidier and in larger squarings
157// it was actually more efficient. I haven't experimented with this small
158// case to see how much that matters. Note: the writeback here is sprinkled
159// into the sequence in such a way that things still work if z = x, i.e. if
160// the output overwrites the input buffer and beyond.
161
162 mov rdx, [x]
163 mulx rdx, rax, rdx
164 mov [z], rax
165 adcx d1, d1
166 adox d1, rdx
167 mov rdx, [x+8]
168 mov [z+8], d1
169 mulx rdx, rax, rdx
170 adcx d2, d2
171 adox d2, rax
172 adcx d3, d3
173 adox d3, rdx
174 mov rdx, [x+16]
175 mov [z+16], d2
176 mulx rdx, rax, rdx
177 adcx d4, d4
178 adox d4, rax
179 adcx d5, d5
180 adox d5, rdx
181 mov rdx, [x+24]
182 mov [z+24], d3
183 mulx rdx, rax, rdx
184 adcx d6, d6
185 adox d6, rax
186 adcx d7, d7
187 adox d7, rdx
188 mov rdx, [x+32]
189 mov [z+32], d4
190 mulx rdx, rax, rdx
191 adcx d8, d8
192 adox d8, rax
193 adcx d9, d9
194 adox d9, rdx
195 mov rdx, [x+40]
196 mov [z+40], d5
197 mulx rdx, rax, rdx
198 mov [z+48], d6
199 adcx d10, d10
200 mov [z+56], d7
201 adox d10, rax
202 mov [z+64], d8
203 mov eax, 0
204 mov [z+72], d9
205 adcx rdx, rax
206 mov [z+80], d10
207 adox rdx, rax
208 mov [z+88], rdx
209
210// Restore saved registers and return
211
212 pop r15
213 pop r14
214 pop r13
215 pop r12
216 pop rbx
217 pop rbp
218
219#if WINDOWS_ABI
220 pop rsi
221 pop rdi
222#endif
223 ret
224
225#if defined(__linux__) && defined(__ELF__)
226.section .note.GNU-stack,"",%progbits
227#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12_alt.S
new file mode 100644
index 0000000000..eb43b0a15b
--- /dev/null
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12_alt.S
@@ -0,0 +1,210 @@
1// $OpenBSD: bignum_sqr_6_12_alt.S,v 1.4 2025/08/12 10:23:40 jsing Exp $
2//
3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4//
5// Permission to use, copy, modify, and/or distribute this software for any
6// purpose with or without fee is hereby granted, provided that the above
7// copyright notice and this permission notice appear in all copies.
8//
9// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
17// ----------------------------------------------------------------------------
18// Square, z := x^2
19// Input x[6]; output z[12]
20//
21// extern void bignum_sqr_6_12_alt(uint64_t z[static 12],
22// const uint64_t x[static 6]);
23//
24// Standard x86-64 ABI: RDI = z, RSI = x
25// Microsoft x64 ABI: RCX = z, RDX = x
26// ----------------------------------------------------------------------------
27
28#include "s2n_bignum_internal.h"
29
30 .intel_syntax noprefix
31 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_6_12_alt)
32 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_6_12_alt)
33 .text
34
35// Input arguments
36
37#define z rdi
38#define x rsi
39
40// Other variables used as a rotating 3-word window to add terms to
41
42#define t0 r8
43#define t1 r9
44#define t2 r10
45
46// Additional temporaries for local windows to share doublings
47
48#define u0 rcx
49#define u1 r11
50
51// Macro for the key "multiply and add to (c,h,l)" step
52
53#define combadd(c,h,l,numa,numb) \
54 mov rax, numa; \
55 mul QWORD PTR numb; \
56 add l, rax; \
57 adc h, rdx; \
58 adc c, 0
59
60// Set up initial window (c,h,l) = numa * numb
61
62#define combaddz(c,h,l,numa,numb) \
63 mov rax, numa; \
64 mul QWORD PTR numb; \
65 xor c, c; \
66 mov l, rax; \
67 mov h, rdx
68
69// Doubling step (c,h,l) = 2 * (c,hh,ll) + (0,h,l)
70
71#define doubladd(c,h,l,hh,ll) \
72 add ll, ll; \
73 adc hh, hh; \
74 adc c, c; \
75 add l, ll; \
76 adc h, hh; \
77 adc c, 0
78
79// Square term incorporation (c,h,l) += numba^2
80
81#define combadd1(c,h,l,numa) \
82 mov rax, numa; \
83 mul rax; \
84 add l, rax; \
85 adc h, rdx; \
86 adc c, 0
87
88// A short form where we don't expect a top carry
89
90#define combads(h,l,numa) \
91 mov rax, numa; \
92 mul rax; \
93 add l, rax; \
94 adc h, rdx
95
96// A version doubling directly before adding, for single non-square terms
97
98#define combadd2(c,h,l,numa,numb) \
99 mov rax, numa; \
100 mul QWORD PTR numb; \
101 add rax, rax; \
102 adc rdx, rdx; \
103 adc c, 0; \
104 add l, rax; \
105 adc h, rdx; \
106 adc c, 0
107
108S2N_BN_SYMBOL(bignum_sqr_6_12_alt):
109 _CET_ENDBR
110
111#if WINDOWS_ABI
112 push rdi
113 push rsi
114 mov rdi, rcx
115 mov rsi, rdx
116#endif
117
118// Result term 0
119
120 mov rax, [x]
121 mul rax
122
123 mov [z], rax
124 mov t0, rdx
125 xor t1, t1
126
127// Result term 1
128
129 xor t2, t2
130 combadd2(t2,t1,t0,[x],[x+8])
131 mov [z+8], t0
132
133// Result term 2
134
135 xor t0, t0
136 combadd1(t0,t2,t1,[x+8])
137 combadd2(t0,t2,t1,[x],[x+16])
138 mov [z+16], t1
139
140// Result term 3
141
142 combaddz(t1,u1,u0,[x],[x+24])
143 combadd(t1,u1,u0,[x+8],[x+16])
144 doubladd(t1,t0,t2,u1,u0)
145 mov [z+24], t2
146
147// Result term 4
148
149 combaddz(t2,u1,u0,[x],[x+32])
150 combadd(t2,u1,u0,[x+8],[x+24])
151 doubladd(t2,t1,t0,u1,u0)
152 combadd1(t2,t1,t0,[x+16])
153 mov [z+32], t0
154
155// Result term 5
156
157 combaddz(t0,u1,u0,[x],[x+40])
158 combadd(t0,u1,u0,[x+8],[x+32])
159 combadd(t0,u1,u0,[x+16],[x+24])
160 doubladd(t0,t2,t1,u1,u0)
161 mov [z+40], t1
162
163// Result term 6
164
165 combaddz(t1,u1,u0,[x+8],[x+40])
166 combadd(t1,u1,u0,[x+16],[x+32])
167 doubladd(t1,t0,t2,u1,u0)
168 combadd1(t1,t0,t2,[x+24])
169 mov [z+48], t2
170
171// Result term 7
172
173 combaddz(t2,u1,u0,[x+16],[x+40])
174 combadd(t2,u1,u0,[x+24],[x+32])
175 doubladd(t2,t1,t0,u1,u0)
176 mov [z+56], t0
177
178// Result term 8
179
180 xor t0, t0
181 combadd2(t0,t2,t1,[x+24],[x+40])
182 combadd1(t0,t2,t1,[x+32])
183 mov [z+64], t1
184
185// Result term 9
186
187 xor t1, t1
188 combadd2(t1,t0,t2,[x+32],[x+40])
189 mov [z+72], t2
190
191// Result term 10
192
193 combads(t1,t0,[x+40])
194 mov [z+80], t0
195
196// Result term 11
197
198 mov [z+88], t1
199
200// Return
201
202#if WINDOWS_ABI
203 pop rsi
204 pop rdi
205#endif
206 ret
207
208#if defined(__linux__) && defined(__ELF__)
209.section .note.GNU-stack,"",%progbits
210#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16.S
new file mode 100644
index 0000000000..41277b5b6a
--- /dev/null
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16.S
@@ -0,0 +1,311 @@
1// $OpenBSD: bignum_sqr_8_16.S,v 1.4 2025/08/12 10:23:40 jsing Exp $
2//
3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4//
5// Permission to use, copy, modify, and/or distribute this software for any
6// purpose with or without fee is hereby granted, provided that the above
7// copyright notice and this permission notice appear in all copies.
8//
9// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
17// ----------------------------------------------------------------------------
18// Square, z := x^2
19// Input x[8]; output z[16]
20//
21// extern void bignum_sqr_8_16(uint64_t z[static 16], const uint64_t x[static 8]);
22//
23// Standard x86-64 ABI: RDI = z, RSI = x
24// Microsoft x64 ABI: RCX = z, RDX = x
25// ----------------------------------------------------------------------------
26
27#include "s2n_bignum_internal.h"
28
29 .intel_syntax noprefix
30 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_8_16)
31 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_8_16)
32 .text
33
34// These are actually right
35
36#define z rdi
37#define x rsi
38
39// A zero register
40
41#define zero rbp
42#define zeroe ebp
43
44// mulpadd i, j adds rdx * x[i] into the window at the i+j point
45
46.macro mulpadd arg1,arg2
47 mulx rcx, rax, [x+8*\arg1]
48.if ((\arg1 + \arg2) % 8 == 0)
49 adcx r8, rax
50 adox r9, rcx
51.elseif ((\arg1 + \arg2) % 8 == 1)
52 adcx r9, rax
53 adox r10, rcx
54.elseif ((\arg1 + \arg2) % 8 == 2)
55 adcx r10, rax
56 adox r11, rcx
57.elseif ((\arg1 + \arg2) % 8 == 3)
58 adcx r11, rax
59 adox r12, rcx
60.elseif ((\arg1 + \arg2) % 8 == 4)
61 adcx r12, rax
62 adox r13, rcx
63.elseif ((\arg1 + \arg2) % 8 == 5)
64 adcx r13, rax
65 adox r14, rcx
66.elseif ((\arg1 + \arg2) % 8 == 6)
67 adcx r14, rax
68 adox r15, rcx
69.elseif ((\arg1 + \arg2) % 8 == 7)
70 adcx r15, rax
71 adox r8, rcx
72.endif
73
74.endm
75
76// mulpade i, j adds rdx * x[i] into the window at i+j
77// but re-creates the top word assuming nothing to add there
78
79.macro mulpade arg1,arg2
80.if ((\arg1 + \arg2) % 8 == 0)
81 mulx r9, rax, [x+8*\arg1]
82 adcx r8, rax
83 adox r9, zero
84.elseif ((\arg1 + \arg2) % 8 == 1)
85 mulx r10, rax, [x+8*\arg1]
86 adcx r9, rax
87 adox r10, zero
88.elseif ((\arg1 + \arg2) % 8 == 2)
89 mulx r11, rax, [x+8*\arg1]
90 adcx r10, rax
91 adox r11, zero
92.elseif ((\arg1 + \arg2) % 8 == 3)
93 mulx r12, rax, [x+8*\arg1]
94 adcx r11, rax
95 adox r12, zero
96.elseif ((\arg1 + \arg2) % 8 == 4)
97 mulx r13, rax, [x+8*\arg1]
98 adcx r12, rax
99 adox r13, zero
100.elseif ((\arg1 + \arg2) % 8 == 5)
101 mulx r14, rax, [x+8*\arg1]
102 adcx r13, rax
103 adox r14, zero
104.elseif ((\arg1 + \arg2) % 8 == 6)
105 mulx r15, rax, [x+8*\arg1]
106 adcx r14, rax
107 adox r15, zero
108.elseif ((\arg1 + \arg2) % 8 == 7)
109 mulx r8, rax, [x+8*\arg1]
110 adcx r15, rax
111 adox r8, zero
112.endif
113
114.endm
115
116.macro diagonals
117
118 xor zeroe, zeroe
119
120// Set initial window [r8..r10] + 2 wb = 10 + 20 + 30 + 40 + 50 + 60 + 70
121
122 mov rdx, [x]
123 mulx rax, r9, [x+8]
124 mov [z+8], r9
125 mulx rcx, r10, [x+16]
126 adcx r10, rax
127 mov [z+16], r10
128 mulx rax, r11, [x+24]
129 adcx r11, rcx
130 mulx rcx, r12, [x+32]
131 adcx r12, rax
132 mulx rax, r13, [x+40]
133 adcx r13, rcx
134 mulx rcx, r14, [x+48]
135 adcx r14, rax
136 mulx r8, r15, [x+56]
137 adcx r15, rcx
138 adcx r8, zero
139
140// Add in the next diagonal = 21 + 31 + 41 + 51 + 61 + 71 + 54
141
142 xor zeroe, zeroe
143 mov rdx, [x+8]
144 mulpadd 2, 1
145 mov [z+24], r11
146 mulpadd 3, 1
147 mov [z+32], r12
148 mulpadd 4, 1
149 mulpadd 5, 1
150 mulpadd 6, 1
151 mulpade 7, 1
152 mov rdx, [x+32]
153 mulpade 5, 4
154 adcx r10, zero
155
156// And the next one = 32 + 42 + 52 + 62 + 72 + 64 + 65
157
158 xor zeroe, zeroe
159 mov rdx, [x+16]
160 mulpadd 3, 2
161 mov [z+40], r13
162 mulpadd 4, 2
163 mov [z+48], r14
164 mulpadd 5, 2
165 mulpadd 6, 2
166 mulpadd 7, 2
167 mov rdx, [x+48]
168 mulpade 4, 6
169 mulpade 5, 6
170 adcx r12, zero
171
172// And the final one = 43 + 53 + 63 + 73 + 74 + 75 + 76
173
174 xor zeroe, zeroe
175 mov rdx, [x+24]
176 mulpadd 4, 3
177 mov [z+56], r15
178 mulpadd 5, 3
179 mov [z+64], r8
180 mulpadd 6, 3
181 mulpadd 7, 3
182 mov rdx, [x+56]
183 mulpadd 4, 7
184 mulpade 5, 7
185 mulpade 6, 7
186 adcx r14, zero
187
188// Double and add things; use z[1]..z[8] and thereafter the registers
189// r9..r15 which haven't been written back yet
190
191 xor zeroe, zeroe
192 mov rdx, [x]
193 mulx rcx, rax, rdx
194 mov [z], rax
195 mov rax, [z+8]
196 adcx rax, rax
197 adox rax, rcx
198 mov [z+8], rax
199
200 mov rax, [z+16]
201 mov rdx, [x+8]
202 mulx rcx, rdx, rdx
203 adcx rax, rax
204 adox rax, rdx
205 mov [z+16], rax
206 mov rax, [z+24]
207 adcx rax, rax
208 adox rax, rcx
209 mov [z+24], rax
210
211 mov rax, [z+32]
212 mov rdx, [x+16]
213 mulx rcx, rdx, rdx
214 adcx rax, rax
215 adox rax, rdx
216 mov [z+32], rax
217 mov rax, [z+40]
218 adcx rax, rax
219 adox rax, rcx
220 mov [z+40], rax
221
222 mov rax, [z+48]
223 mov rdx, [x+24]
224 mulx rcx, rdx, rdx
225 adcx rax, rax
226 adox rax, rdx
227 mov [z+48], rax
228 mov rax, [z+56]
229 adcx rax, rax
230 adox rax, rcx
231 mov [z+56], rax
232
233 mov rax, [z+64]
234 mov rdx, [x+32]
235 mulx rcx, rdx, rdx
236 adcx rax, rax
237 adox rax, rdx
238 mov [z+64], rax
239 adcx r9, r9
240 adox r9, rcx
241 mov [z+72], r9
242
243 mov rdx, [x+40]
244 mulx rcx, rdx, rdx
245 adcx r10, r10
246 adox r10, rdx
247 mov [z+80], r10
248 adcx r11, r11
249 adox r11, rcx
250 mov [z+88], r11
251
252 mov rdx, [x+48]
253 mulx rcx, rdx, rdx
254 adcx r12, r12
255 adox r12, rdx
256 mov [z+96], r12
257 adcx r13, r13
258 adox r13, rcx
259 mov [z+104], r13
260
261 mov rdx, [x+56]
262 mulx r15, rdx, rdx
263 adcx r14, r14
264 adox r14, rdx
265 mov [z+112], r14
266 adcx r15, zero
267 adox r15, zero
268 mov [z+120], r15
269
270.endm
271
272
273S2N_BN_SYMBOL(bignum_sqr_8_16):
274 _CET_ENDBR
275
276#if WINDOWS_ABI
277 push rdi
278 push rsi
279 mov rdi, rcx
280 mov rsi, rdx
281#endif
282
283// Save more registers to play with
284
285 push rbp
286 push r12
287 push r13
288 push r14
289 push r15
290
291// Do the multiplication
292
293 diagonals
294
295// Real epilog
296
297 pop r15
298 pop r14
299 pop r13
300 pop r12
301 pop rbp
302
303#if WINDOWS_ABI
304 pop rsi
305 pop rdi
306#endif
307 ret
308
309#if defined(__linux__) && defined(__ELF__)
310.section .note.GNU-stack,"",%progbits
311#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S
index ac0b6f96c2..cb10ba2a12 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S
@@ -1,3 +1,5 @@
1// $OpenBSD: bignum_sqr_8_16_alt.S,v 1.7 2025/08/11 14:13:56 jsing Exp $
2//
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// 4//
3// Permission to use, copy, modify, and/or distribute this software for any 5// Permission to use, copy, modify, and/or distribute this software for any
@@ -16,7 +18,8 @@
16// Square, z := x^2 18// Square, z := x^2
17// Input x[8]; output z[16] 19// Input x[8]; output z[16]
18// 20//
19// extern void bignum_sqr_8_16_alt (uint64_t z[static 16], uint64_t x[static 8]); 21// extern void bignum_sqr_8_16_alt(uint64_t z[static 16],
22// const uint64_t x[static 8]);
20// 23//
21// Standard x86-64 ABI: RDI = z, RSI = x 24// Standard x86-64 ABI: RDI = z, RSI = x
22// Microsoft x64 ABI: RCX = z, RDX = x 25// Microsoft x64 ABI: RCX = z, RDX = x
@@ -103,7 +106,7 @@
103 adc c, 0 106 adc c, 0
104 107
105S2N_BN_SYMBOL(bignum_sqr_8_16_alt): 108S2N_BN_SYMBOL(bignum_sqr_8_16_alt):
106 _CET_ENDBR 109 _CET_ENDBR
107 110
108#if WINDOWS_ABI 111#if WINDOWS_ABI
109 push rdi 112 push rdi
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S
index 3ff8a30510..7324d3a71e 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S
@@ -1,3 +1,5 @@
1// $OpenBSD: bignum_sub.S,v 1.7 2025/08/11 14:13:56 jsing Exp $
2//
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// 4//
3// Permission to use, copy, modify, and/or distribute this software for any 5// Permission to use, copy, modify, and/or distribute this software for any
@@ -16,9 +18,8 @@
16// Subtract, z := x - y 18// Subtract, z := x - y
17// Inputs x[m], y[n]; outputs function return (carry-out) and z[p] 19// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
18// 20//
19// extern uint64_t bignum_sub 21// extern uint64_t bignum_sub(uint64_t p, uint64_t *z, uint64_t m,
20// (uint64_t p, uint64_t *z, 22// const uint64_t *x, uint64_t n, const uint64_t *y);
21// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
22// 23//
23// Does the z := x - y operation, truncating modulo p words in general and 24// Does the z := x - y operation, truncating modulo p words in general and
24// returning a top borrow (0 or 1) in the p'th place, only subtracting input 25// returning a top borrow (0 or 1) in the p'th place, only subtracting input
@@ -49,7 +50,7 @@
49 50
50 51
51S2N_BN_SYMBOL(bignum_sub): 52S2N_BN_SYMBOL(bignum_sub):
52 _CET_ENDBR 53 _CET_ENDBR
53 54
54#if WINDOWS_ABI 55#if WINDOWS_ABI
55 push rdi 56 push rdi
@@ -75,7 +76,7 @@ S2N_BN_SYMBOL(bignum_sub):
75 cmp p, n 76 cmp p, n
76 cmovc n, p 77 cmovc n, p
77 cmp m, n 78 cmp m, n
78 jc ylonger 79 jc bignum_sub_ylonger
79 80
80// The case where x is longer or of the same size (p >= m >= n) 81// The case where x is longer or of the same size (p >= m >= n)
81 82
@@ -83,32 +84,32 @@ S2N_BN_SYMBOL(bignum_sub):
83 sub m, n 84 sub m, n
84 inc m 85 inc m
85 test n, n 86 test n, n
86 jz xtest 87 jz bignum_sub_xtest
87xmainloop: 88bignum_sub_xmainloop:
88 mov a, [x+8*i] 89 mov a, [x+8*i]
89 sbb a, [y+8*i] 90 sbb a, [y+8*i]
90 mov [z+8*i],a 91 mov [z+8*i],a
91 inc i 92 inc i
92 dec n 93 dec n
93 jnz xmainloop 94 jnz bignum_sub_xmainloop
94 jmp xtest 95 jmp bignum_sub_xtest
95xtoploop: 96bignum_sub_xtoploop:
96 mov a, [x+8*i] 97 mov a, [x+8*i]
97 sbb a, 0 98 sbb a, 0
98 mov [z+8*i],a 99 mov [z+8*i],a
99 inc i 100 inc i
100xtest: 101bignum_sub_xtest:
101 dec m 102 dec m
102 jnz xtoploop 103 jnz bignum_sub_xtoploop
103 sbb a, a 104 sbb a, a
104 test p, p 105 test p, p
105 jz tailskip 106 jz bignum_sub_tailskip
106tailloop: 107bignum_sub_tailloop:
107 mov [z+8*i],a 108 mov [z+8*i],a
108 inc i 109 inc i
109 dec p 110 dec p
110 jnz tailloop 111 jnz bignum_sub_tailloop
111tailskip: 112bignum_sub_tailskip:
112 neg a 113 neg a
113#if WINDOWS_ABI 114#if WINDOWS_ABI
114 pop rsi 115 pop rsi
@@ -118,29 +119,29 @@ tailskip:
118 119
119// The case where y is longer (p >= n > m) 120// The case where y is longer (p >= n > m)
120 121
121ylonger: 122bignum_sub_ylonger:
122 123
123 sub p, n 124 sub p, n
124 sub n, m 125 sub n, m
125 test m, m 126 test m, m
126 jz ytoploop 127 jz bignum_sub_ytoploop
127ymainloop: 128bignum_sub_ymainloop:
128 mov a, [x+8*i] 129 mov a, [x+8*i]
129 sbb a, [y+8*i] 130 sbb a, [y+8*i]
130 mov [z+8*i],a 131 mov [z+8*i],a
131 inc i 132 inc i
132 dec m 133 dec m
133 jnz ymainloop 134 jnz bignum_sub_ymainloop
134ytoploop: 135bignum_sub_ytoploop:
135 mov ashort, 0 136 mov ashort, 0
136 sbb a, [y+8*i] 137 sbb a, [y+8*i]
137 mov [z+8*i],a 138 mov [z+8*i],a
138 inc i 139 inc i
139 dec n 140 dec n
140 jnz ytoploop 141 jnz bignum_sub_ytoploop
141 sbb a, a 142 sbb a, a
142 test p, p 143 test p, p
143 jnz tailloop 144 jnz bignum_sub_tailloop
144 neg a 145 neg a
145#if WINDOWS_ABI 146#if WINDOWS_ABI
146 pop rsi 147 pop rsi
diff --git a/src/lib/libcrypto/bn/arch/amd64/bn_arch.c b/src/lib/libcrypto/bn/arch/amd64/bn_arch.c
index a377a05681..6c3888687b 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bn_arch.c
+++ b/src/lib/libcrypto/bn/arch/amd64/bn_arch.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_arch.c,v 1.7 2023/06/24 16:01:44 jsing Exp $ */ 1/* $OpenBSD: bn_arch.c,v 1.17 2025/09/01 15:33:23 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -19,6 +19,7 @@
19 19
20#include "bn_arch.h" 20#include "bn_arch.h"
21#include "bn_local.h" 21#include "bn_local.h"
22#include "crypto_arch.h"
22#include "s2n_bignum.h" 23#include "s2n_bignum.h"
23 24
24#ifdef HAVE_BN_ADD 25#ifdef HAVE_BN_ADD
@@ -26,8 +27,8 @@ BN_ULONG
26bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b, 27bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b,
27 int b_len) 28 int b_len)
28{ 29{
29 return bignum_add(r_len, (uint64_t *)r, a_len, (uint64_t *)a, 30 return bignum_add(r_len, (uint64_t *)r, a_len, (const uint64_t *)a,
30 b_len, (uint64_t *)b); 31 b_len, (const uint64_t *)b);
31} 32}
32#endif 33#endif
33 34
@@ -36,8 +37,8 @@ bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b,
36BN_ULONG 37BN_ULONG
37bn_add_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n) 38bn_add_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n)
38{ 39{
39 return bignum_add(n, (uint64_t *)rd, n, (uint64_t *)ad, n, 40 return bignum_add(n, (uint64_t *)rd, n, (const uint64_t *)ad, n,
40 (uint64_t *)bd); 41 (const uint64_t *)bd);
41} 42}
42#endif 43#endif
43 44
@@ -46,8 +47,8 @@ BN_ULONG
46bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b, 47bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b,
47 int b_len) 48 int b_len)
48{ 49{
49 return bignum_sub(r_len, (uint64_t *)r, a_len, (uint64_t *)a, 50 return bignum_sub(r_len, (uint64_t *)r, a_len, (const uint64_t *)a,
50 b_len, (uint64_t *)b); 51 b_len, (const uint64_t *)b);
51} 52}
52#endif 53#endif
53 54
@@ -55,52 +56,99 @@ bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b,
55BN_ULONG 56BN_ULONG
56bn_sub_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n) 57bn_sub_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n)
57{ 58{
58 return bignum_sub(n, (uint64_t *)rd, n, (uint64_t *)ad, n, 59 return bignum_sub(n, (uint64_t *)rd, n, (const uint64_t *)ad, n,
59 (uint64_t *)bd); 60 (const uint64_t *)bd);
60} 61}
61#endif 62#endif
62 63
63#ifdef HAVE_BN_MUL_ADD_WORDS 64#ifdef HAVE_BN_MOD_ADD_WORDS
64BN_ULONG 65void
65bn_mul_add_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w) 66bn_mod_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
67 const BN_ULONG *m, size_t n)
66{ 68{
67 return bignum_cmadd(num, (uint64_t *)rd, w, num, (uint64_t *)ad); 69 bignum_modadd(n, (uint64_t *)r, (const uint64_t *)a,
70 (const uint64_t *)b, (const uint64_t *)m);
68} 71}
69#endif 72#endif
70 73
71#ifdef HAVE_BN_MUL_WORDS 74#ifdef HAVE_BN_MOD_SUB_WORDS
72BN_ULONG 75void
73bn_mul_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w) 76bn_mod_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
77 const BN_ULONG *m, size_t n)
74{ 78{
75 return bignum_cmul(num, (uint64_t *)rd, w, num, (uint64_t *)ad); 79 bignum_modsub(n, (uint64_t *)r, (const uint64_t *)a,
80 (const uint64_t *)b, (const uint64_t *)m);
76} 81}
77#endif 82#endif
78 83
79#ifdef HAVE_BN_MUL_COMBA4 84#ifdef HAVE_BN_MUL_COMBA4
80void 85void
81bn_mul_comba4(BN_ULONG *rd, BN_ULONG *ad, BN_ULONG *bd) 86bn_mul_comba4(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd)
87{
88 if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_ADX) != 0) {
89 bignum_mul_4_8((uint64_t *)rd, (const uint64_t *)ad,
90 (const uint64_t *)bd);
91 return;
92 }
93
94 bignum_mul_4_8_alt((uint64_t *)rd, (const uint64_t *)ad,
95 (const uint64_t *)bd);
96}
97#endif
98
99#ifdef HAVE_BN_MUL_COMBA6
100void
101bn_mul_comba6(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd)
82{ 102{
83 /* XXX - consider using non-alt on CPUs that have the ADX extension. */ 103 if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_ADX) != 0) {
84 bignum_mul_4_8_alt((uint64_t *)rd, (uint64_t *)ad, (uint64_t *)bd); 104 bignum_mul_6_12((uint64_t *)rd, (const uint64_t *)ad,
105 (const uint64_t *)bd);
106 return;
107 }
108
109 bignum_mul_6_12_alt((uint64_t *)rd, (const uint64_t *)ad,
110 (const uint64_t *)bd);
85} 111}
86#endif 112#endif
87 113
88#ifdef HAVE_BN_MUL_COMBA8 114#ifdef HAVE_BN_MUL_COMBA8
89void 115void
90bn_mul_comba8(BN_ULONG *rd, BN_ULONG *ad, BN_ULONG *bd) 116bn_mul_comba8(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd)
91{ 117{
92 /* XXX - consider using non-alt on CPUs that have the ADX extension. */ 118 if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_ADX) != 0) {
93 bignum_mul_8_16_alt((uint64_t *)rd, (uint64_t *)ad, (uint64_t *)bd); 119 bignum_mul_8_16((uint64_t *)rd, (const uint64_t *)ad,
120 (const uint64_t *)bd);
121 return;
122 }
123
124 bignum_mul_8_16_alt((uint64_t *)rd, (const uint64_t *)ad,
125 (const uint64_t *)bd);
94} 126}
95#endif 127#endif
96 128
97#ifdef HAVE_BN_SQR 129#ifdef HAVE_BN_MUL_WORDS
98int 130void
99bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx) 131bn_mul_words(BN_ULONG *r, const BN_ULONG *a, int a_len, const BN_ULONG *b,
132 int b_len)
133{
134 bignum_mul(a_len + b_len, (uint64_t *)r, a_len, (const uint64_t *)a,
135 b_len, (const uint64_t *)b);
136}
137#endif
138
139#ifdef HAVE_BN_MULW_ADD_WORDS
140BN_ULONG
141bn_mulw_add_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w)
100{ 142{
101 bignum_sqr(r_len, (uint64_t *)r->d, a->top, (uint64_t *)a->d); 143 return bignum_cmadd(num, (uint64_t *)rd, w, num, (const uint64_t *)ad);
144}
145#endif
102 146
103 return 1; 147#ifdef HAVE_BN_MULW_WORDS
148BN_ULONG
149bn_mulw_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w)
150{
151 return bignum_cmul(num, (uint64_t *)rd, w, num, (const uint64_t *)ad);
104} 152}
105#endif 153#endif
106 154
@@ -108,8 +156,25 @@ bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx)
108void 156void
109bn_sqr_comba4(BN_ULONG *rd, const BN_ULONG *ad) 157bn_sqr_comba4(BN_ULONG *rd, const BN_ULONG *ad)
110{ 158{
111 /* XXX - consider using non-alt on CPUs that have the ADX extension. */ 159 if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_ADX) != 0) {
112 bignum_sqr_4_8_alt((uint64_t *)rd, (uint64_t *)ad); 160 bignum_sqr_4_8((uint64_t *)rd, (const uint64_t *)ad);
161 return;
162 }
163
164 bignum_sqr_4_8_alt((uint64_t *)rd, (const uint64_t *)ad);
165}
166#endif
167
168#ifdef HAVE_BN_SQR_COMBA6
169void
170bn_sqr_comba6(BN_ULONG *rd, const BN_ULONG *ad)
171{
172 if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_ADX) != 0) {
173 bignum_sqr_6_12((uint64_t *)rd, (const uint64_t *)ad);
174 return;
175 }
176
177 bignum_sqr_6_12_alt((uint64_t *)rd, (const uint64_t *)ad);
113} 178}
114#endif 179#endif
115 180
@@ -117,8 +182,20 @@ bn_sqr_comba4(BN_ULONG *rd, const BN_ULONG *ad)
117void 182void
118bn_sqr_comba8(BN_ULONG *rd, const BN_ULONG *ad) 183bn_sqr_comba8(BN_ULONG *rd, const BN_ULONG *ad)
119{ 184{
120 /* XXX - consider using non-alt on CPUs that have the ADX extension. */ 185 if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_ADX) != 0) {
121 bignum_sqr_8_16_alt((uint64_t *)rd, (uint64_t *)ad); 186 bignum_sqr_8_16((uint64_t *)rd, (const uint64_t *)ad);
187 return;
188 }
189
190 bignum_sqr_8_16_alt((uint64_t *)rd, (const uint64_t *)ad);
191}
192#endif
193
194#ifdef HAVE_BN_SQR_WORDS
195void
196bn_sqr_words(BN_ULONG *rd, const BN_ULONG *ad, int a_len)
197{
198 bignum_sqr(a_len * 2, (uint64_t *)rd, a_len, (const uint64_t *)ad);
122} 199}
123#endif 200#endif
124 201
diff --git a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h b/src/lib/libcrypto/bn/arch/amd64/bn_arch.h
index 927cd75208..3cb1d1d274 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h
+++ b/src/lib/libcrypto/bn/arch/amd64/bn_arch.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_arch.h,v 1.14 2024/03/26 06:09:25 jsing Exp $ */ 1/* $OpenBSD: bn_arch.h,v 1.19 2025/09/01 15:15:44 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -27,14 +27,20 @@
27 27
28#define HAVE_BN_DIV_WORDS 28#define HAVE_BN_DIV_WORDS
29 29
30#define HAVE_BN_MUL_ADD_WORDS 30#define HAVE_BN_MOD_ADD_WORDS
31#define HAVE_BN_MOD_SUB_WORDS
32
31#define HAVE_BN_MUL_COMBA4 33#define HAVE_BN_MUL_COMBA4
34#define HAVE_BN_MUL_COMBA6
32#define HAVE_BN_MUL_COMBA8 35#define HAVE_BN_MUL_COMBA8
33#define HAVE_BN_MUL_WORDS 36#define HAVE_BN_MUL_WORDS
37#define HAVE_BN_MULW_ADD_WORDS
38#define HAVE_BN_MULW_WORDS
34 39
35#define HAVE_BN_SQR
36#define HAVE_BN_SQR_COMBA4 40#define HAVE_BN_SQR_COMBA4
41#define HAVE_BN_SQR_COMBA6
37#define HAVE_BN_SQR_COMBA8 42#define HAVE_BN_SQR_COMBA8
43#define HAVE_BN_SQR_WORDS
38 44
39#define HAVE_BN_SUB 45#define HAVE_BN_SUB
40#define HAVE_BN_SUB_WORDS 46#define HAVE_BN_SUB_WORDS
diff --git a/src/lib/libcrypto/bn/arch/amd64/word_clz.S b/src/lib/libcrypto/bn/arch/amd64/word_clz.S
index 3926fcd4b0..705fbdbbda 100644
--- a/src/lib/libcrypto/bn/arch/amd64/word_clz.S
+++ b/src/lib/libcrypto/bn/arch/amd64/word_clz.S
@@ -1,3 +1,5 @@
1// $OpenBSD: word_clz.S,v 1.7 2025/08/11 14:13:56 jsing Exp $
2//
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// 4//
3// Permission to use, copy, modify, and/or distribute this software for any 5// Permission to use, copy, modify, and/or distribute this software for any
@@ -16,7 +18,7 @@
16// Count leading zero bits in a single word 18// Count leading zero bits in a single word
17// Input a; output function return 19// Input a; output function return
18// 20//
19// extern uint64_t word_clz (uint64_t a); 21// extern uint64_t word_clz(uint64_t a);
20// 22//
21// Standard x86-64 ABI: RDI = a, returns RAX 23// Standard x86-64 ABI: RDI = a, returns RAX
22// Microsoft x64 ABI: RCX = a, returns RAX 24// Microsoft x64 ABI: RCX = a, returns RAX
@@ -30,7 +32,7 @@
30 .text 32 .text
31 33
32S2N_BN_SYMBOL(word_clz): 34S2N_BN_SYMBOL(word_clz):
33 _CET_ENDBR 35 _CET_ENDBR
34 36
35#if WINDOWS_ABI 37#if WINDOWS_ABI
36 push rdi 38 push rdi
diff --git a/src/lib/libcrypto/bn/arch/i386/bn_arch.h b/src/lib/libcrypto/bn/arch/i386/bn_arch.h
index eef519fcc7..288cbdeaa9 100644
--- a/src/lib/libcrypto/bn/arch/i386/bn_arch.h
+++ b/src/lib/libcrypto/bn/arch/i386/bn_arch.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_arch.h,v 1.9 2023/02/16 10:41:03 jsing Exp $ */ 1/* $OpenBSD: bn_arch.h,v 1.11 2025/09/07 03:56:37 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -26,14 +26,13 @@
26 26
27#define HAVE_BN_DIV_WORDS 27#define HAVE_BN_DIV_WORDS
28 28
29#define HAVE_BN_MUL_ADD_WORDS
30#define HAVE_BN_MUL_COMBA4 29#define HAVE_BN_MUL_COMBA4
31#define HAVE_BN_MUL_COMBA8 30#define HAVE_BN_MUL_COMBA8
32#define HAVE_BN_MUL_WORDS 31#define HAVE_BN_MULW_ADD_WORDS
32#define HAVE_BN_MULW_WORDS
33 33
34#define HAVE_BN_SQR_COMBA4 34#define HAVE_BN_SQR_COMBA4
35#define HAVE_BN_SQR_COMBA8 35#define HAVE_BN_SQR_COMBA8
36#define HAVE_BN_SQR_WORDS
37 36
38#define HAVE_BN_SUB_WORDS 37#define HAVE_BN_SUB_WORDS
39 38
diff --git a/src/lib/libcrypto/bn/arch/mips64/bn_arch.h b/src/lib/libcrypto/bn/arch/mips64/bn_arch.h
index 53771bce1e..562a398f33 100644
--- a/src/lib/libcrypto/bn/arch/mips64/bn_arch.h
+++ b/src/lib/libcrypto/bn/arch/mips64/bn_arch.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_arch.h,v 1.7 2023/01/23 12:17:58 jsing Exp $ */ 1/* $OpenBSD: bn_arch.h,v 1.9 2025/09/07 03:56:37 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -25,14 +25,13 @@
25#define HAVE_BN_DIV_WORDS 25#define HAVE_BN_DIV_WORDS
26#define HAVE_BN_DIV_3_WORDS 26#define HAVE_BN_DIV_3_WORDS
27 27
28#define HAVE_BN_MUL_ADD_WORDS
29#define HAVE_BN_MUL_COMBA4 28#define HAVE_BN_MUL_COMBA4
30#define HAVE_BN_MUL_COMBA8 29#define HAVE_BN_MUL_COMBA8
31#define HAVE_BN_MUL_WORDS 30#define HAVE_BN_MULW_ADD_WORDS
31#define HAVE_BN_MULW_WORDS
32 32
33#define HAVE_BN_SQR_COMBA4 33#define HAVE_BN_SQR_COMBA4
34#define HAVE_BN_SQR_COMBA8 34#define HAVE_BN_SQR_COMBA8
35#define HAVE_BN_SQR_WORDS
36 35
37#define HAVE_BN_SUB_WORDS 36#define HAVE_BN_SUB_WORDS
38 37
diff --git a/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h b/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h
index 46e932a2d5..21bcdf48d3 100644
--- a/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h
+++ b/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_arch.h,v 1.6 2023/01/23 12:17:58 jsing Exp $ */ 1/* $OpenBSD: bn_arch.h,v 1.8 2025/09/07 03:56:37 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -24,14 +24,13 @@
24 24
25#define HAVE_BN_DIV_WORDS 25#define HAVE_BN_DIV_WORDS
26 26
27#define HAVE_BN_MUL_ADD_WORDS
28#define HAVE_BN_MUL_COMBA4 27#define HAVE_BN_MUL_COMBA4
29#define HAVE_BN_MUL_COMBA8 28#define HAVE_BN_MUL_COMBA8
30#define HAVE_BN_MUL_WORDS 29#define HAVE_BN_MULW_ADD_WORDS
30#define HAVE_BN_MULW_WORDS
31 31
32#define HAVE_BN_SQR_COMBA4 32#define HAVE_BN_SQR_COMBA4
33#define HAVE_BN_SQR_COMBA8 33#define HAVE_BN_SQR_COMBA8
34#define HAVE_BN_SQR_WORDS
35 34
36#define HAVE_BN_SUB_WORDS 35#define HAVE_BN_SUB_WORDS
37 36
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl
index 71b775af8d..9b4b11ad5b 100644
--- a/src/lib/libcrypto/bn/asm/bn-586.pl
+++ b/src/lib/libcrypto/bn/asm/bn-586.pl
@@ -6,21 +6,20 @@ require "x86asm.pl";
6 6
7&asm_init($ARGV[0],$0); 7&asm_init($ARGV[0],$0);
8 8
9$sse2=0; 9$sse2=1;
10for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
11 10
12&external_label("OPENSSL_ia32cap_P") if ($sse2); 11&external_label("OPENSSL_ia32cap_P") if ($sse2);
13 12
14&bn_mul_add_words("bn_mul_add_words"); 13&bn_mulw_add_words("bn_mulw_add_words");
15&bn_mul_words("bn_mul_words"); 14&bn_mulw_words("bn_mulw_words");
16&bn_sqr_words("bn_sqr_words"); 15&bn_sqr_word_wise("bn_sqr_word_wise");
17&bn_div_words("bn_div_words"); 16&bn_div_words("bn_div_words");
18&bn_add_words("bn_add_words"); 17&bn_add_words("bn_add_words");
19&bn_sub_words("bn_sub_words"); 18&bn_sub_words("bn_sub_words");
20 19
21&asm_finish(); 20&asm_finish();
22 21
23sub bn_mul_add_words 22sub bn_mulw_add_words
24 { 23 {
25 local($name)=@_; 24 local($name)=@_;
26 25
@@ -207,7 +206,7 @@ sub bn_mul_add_words
207 &function_end($name); 206 &function_end($name);
208 } 207 }
209 208
210sub bn_mul_words 209sub bn_mulw_words
211 { 210 {
212 local($name)=@_; 211 local($name)=@_;
213 212
@@ -319,7 +318,7 @@ sub bn_mul_words
319 &function_end($name); 318 &function_end($name);
320 } 319 }
321 320
322sub bn_sqr_words 321sub bn_sqr_word_wise
323 { 322 {
324 local($name)=@_; 323 local($name)=@_;
325 324
diff --git a/src/lib/libcrypto/bn/asm/mips.pl b/src/lib/libcrypto/bn/asm/mips.pl
index 02d43e15b0..aaa0c5d8b0 100644
--- a/src/lib/libcrypto/bn/asm/mips.pl
+++ b/src/lib/libcrypto/bn/asm/mips.pl
@@ -110,19 +110,19 @@ $code.=<<___;
110.set noat 110.set noat
111 111
112.align 5 112.align 5
113.globl bn_mul_add_words 113.globl bn_mulw_add_words
114.ent bn_mul_add_words 114.ent bn_mulw_add_words
115bn_mul_add_words: 115bn_mulw_add_words:
116 .set noreorder 116 .set noreorder
117 bgtz $a2,bn_mul_add_words_internal 117 bgtz $a2,bn_mulw_add_words_internal
118 move $v0,$zero 118 move $v0,$zero
119 jr $ra 119 jr $ra
120 move $a0,$v0 120 move $a0,$v0
121.end bn_mul_add_words 121.end bn_mulw_add_words
122 122
123.align 5 123.align 5
124.ent bn_mul_add_words_internal 124.ent bn_mulw_add_words_internal
125bn_mul_add_words_internal: 125bn_mulw_add_words_internal:
126___ 126___
127$code.=<<___ if ($flavour =~ /nubi/i); 127$code.=<<___ if ($flavour =~ /nubi/i);
128 .frame $sp,6*$SZREG,$ra 128 .frame $sp,6*$SZREG,$ra
@@ -140,9 +140,9 @@ $code.=<<___;
140 .set reorder 140 .set reorder
141 li $minus4,-4 141 li $minus4,-4
142 and $ta0,$a2,$minus4 142 and $ta0,$a2,$minus4
143 beqz $ta0,.L_bn_mul_add_words_tail 143 beqz $ta0,.L_bn_mulw_add_words_tail
144 144
145.L_bn_mul_add_words_loop: 145.L_bn_mulw_add_words_loop:
146 $LD $t0,0($a1) 146 $LD $t0,0($a1)
147 $MULTU $t0,$a3 147 $MULTU $t0,$a3
148 $LD $t1,0($a0) 148 $LD $t1,0($a0)
@@ -201,13 +201,13 @@ $code.=<<___;
201 sltu $at,$ta3,$at 201 sltu $at,$ta3,$at
202 $ST $ta3,-$BNSZ($a0) 202 $ST $ta3,-$BNSZ($a0)
203 .set noreorder 203 .set noreorder
204 bgtz $ta0,.L_bn_mul_add_words_loop 204 bgtz $ta0,.L_bn_mulw_add_words_loop
205 $ADDU $v0,$at 205 $ADDU $v0,$at
206 206
207 beqz $a2,.L_bn_mul_add_words_return 207 beqz $a2,.L_bn_mulw_add_words_return
208 nop 208 nop
209 209
210.L_bn_mul_add_words_tail: 210.L_bn_mulw_add_words_tail:
211 .set reorder 211 .set reorder
212 $LD $t0,0($a1) 212 $LD $t0,0($a1)
213 $MULTU $t0,$a3 213 $MULTU $t0,$a3
@@ -222,7 +222,7 @@ $code.=<<___;
222 sltu $at,$t1,$at 222 sltu $at,$t1,$at
223 $ST $t1,0($a0) 223 $ST $t1,0($a0)
224 $ADDU $v0,$at 224 $ADDU $v0,$at
225 beqz $a2,.L_bn_mul_add_words_return 225 beqz $a2,.L_bn_mulw_add_words_return
226 226
227 $LD $t0,$BNSZ($a1) 227 $LD $t0,$BNSZ($a1)
228 $MULTU $t0,$a3 228 $MULTU $t0,$a3
@@ -237,7 +237,7 @@ $code.=<<___;
237 sltu $at,$t1,$at 237 sltu $at,$t1,$at
238 $ST $t1,$BNSZ($a0) 238 $ST $t1,$BNSZ($a0)
239 $ADDU $v0,$at 239 $ADDU $v0,$at
240 beqz $a2,.L_bn_mul_add_words_return 240 beqz $a2,.L_bn_mulw_add_words_return
241 241
242 $LD $t0,2*$BNSZ($a1) 242 $LD $t0,2*$BNSZ($a1)
243 $MULTU $t0,$a3 243 $MULTU $t0,$a3
@@ -252,7 +252,7 @@ $code.=<<___;
252 $ST $t1,2*$BNSZ($a0) 252 $ST $t1,2*$BNSZ($a0)
253 $ADDU $v0,$at 253 $ADDU $v0,$at
254 254
255.L_bn_mul_add_words_return: 255.L_bn_mulw_add_words_return:
256 .set noreorder 256 .set noreorder
257___ 257___
258$code.=<<___ if ($flavour =~ /nubi/i); 258$code.=<<___ if ($flavour =~ /nubi/i);
@@ -266,22 +266,22 @@ ___
266$code.=<<___; 266$code.=<<___;
267 jr $ra 267 jr $ra
268 move $a0,$v0 268 move $a0,$v0
269.end bn_mul_add_words_internal 269.end bn_mulw_add_words_internal
270 270
271.align 5 271.align 5
272.globl bn_mul_words 272.globl bn_mulw_words
273.ent bn_mul_words 273.ent bn_mulw_words
274bn_mul_words: 274bn_mulw_words:
275 .set noreorder 275 .set noreorder
276 bgtz $a2,bn_mul_words_internal 276 bgtz $a2,bn_mulw_words_internal
277 move $v0,$zero 277 move $v0,$zero
278 jr $ra 278 jr $ra
279 move $a0,$v0 279 move $a0,$v0
280.end bn_mul_words 280.end bn_mulw_words
281 281
282.align 5 282.align 5
283.ent bn_mul_words_internal 283.ent bn_mulw_words_internal
284bn_mul_words_internal: 284bn_mulw_words_internal:
285___ 285___
286$code.=<<___ if ($flavour =~ /nubi/i); 286$code.=<<___ if ($flavour =~ /nubi/i);
287 .frame $sp,6*$SZREG,$ra 287 .frame $sp,6*$SZREG,$ra
@@ -299,9 +299,9 @@ $code.=<<___;
299 .set reorder 299 .set reorder
300 li $minus4,-4 300 li $minus4,-4
301 and $ta0,$a2,$minus4 301 and $ta0,$a2,$minus4
302 beqz $ta0,.L_bn_mul_words_tail 302 beqz $ta0,.L_bn_mulw_words_tail
303 303
304.L_bn_mul_words_loop: 304.L_bn_mulw_words_loop:
305 $LD $t0,0($a1) 305 $LD $t0,0($a1)
306 $MULTU $t0,$a3 306 $MULTU $t0,$a3
307 $LD $t2,$BNSZ($a1) 307 $LD $t2,$BNSZ($a1)
@@ -341,13 +341,13 @@ $code.=<<___;
341 sltu $ta3,$v0,$at 341 sltu $ta3,$v0,$at
342 $ST $v0,-$BNSZ($a0) 342 $ST $v0,-$BNSZ($a0)
343 .set noreorder 343 .set noreorder
344 bgtz $ta0,.L_bn_mul_words_loop 344 bgtz $ta0,.L_bn_mulw_words_loop
345 $ADDU $v0,$ta3,$ta2 345 $ADDU $v0,$ta3,$ta2
346 346
347 beqz $a2,.L_bn_mul_words_return 347 beqz $a2,.L_bn_mulw_words_return
348 nop 348 nop
349 349
350.L_bn_mul_words_tail: 350.L_bn_mulw_words_tail:
351 .set reorder 351 .set reorder
352 $LD $t0,0($a1) 352 $LD $t0,0($a1)
353 $MULTU $t0,$a3 353 $MULTU $t0,$a3
@@ -358,7 +358,7 @@ $code.=<<___;
358 sltu $t1,$v0,$at 358 sltu $t1,$v0,$at
359 $ST $v0,0($a0) 359 $ST $v0,0($a0)
360 $ADDU $v0,$t1,$t0 360 $ADDU $v0,$t1,$t0
361 beqz $a2,.L_bn_mul_words_return 361 beqz $a2,.L_bn_mulw_words_return
362 362
363 $LD $t0,$BNSZ($a1) 363 $LD $t0,$BNSZ($a1)
364 $MULTU $t0,$a3 364 $MULTU $t0,$a3
@@ -369,7 +369,7 @@ $code.=<<___;
369 sltu $t1,$v0,$at 369 sltu $t1,$v0,$at
370 $ST $v0,$BNSZ($a0) 370 $ST $v0,$BNSZ($a0)
371 $ADDU $v0,$t1,$t0 371 $ADDU $v0,$t1,$t0
372 beqz $a2,.L_bn_mul_words_return 372 beqz $a2,.L_bn_mulw_words_return
373 373
374 $LD $t0,2*$BNSZ($a1) 374 $LD $t0,2*$BNSZ($a1)
375 $MULTU $t0,$a3 375 $MULTU $t0,$a3
@@ -380,7 +380,7 @@ $code.=<<___;
380 $ST $v0,2*$BNSZ($a0) 380 $ST $v0,2*$BNSZ($a0)
381 $ADDU $v0,$t1,$t0 381 $ADDU $v0,$t1,$t0
382 382
383.L_bn_mul_words_return: 383.L_bn_mulw_words_return:
384 .set noreorder 384 .set noreorder
385___ 385___
386$code.=<<___ if ($flavour =~ /nubi/i); 386$code.=<<___ if ($flavour =~ /nubi/i);
@@ -394,22 +394,22 @@ ___
394$code.=<<___; 394$code.=<<___;
395 jr $ra 395 jr $ra
396 move $a0,$v0 396 move $a0,$v0
397.end bn_mul_words_internal 397.end bn_mulw_words_internal
398 398
399.align 5 399.align 5
400.globl bn_sqr_words 400.globl bn_sqr_word_wise
401.ent bn_sqr_words 401.ent bn_sqr_word_wise
402bn_sqr_words: 402bn_sqr_word_wise:
403 .set noreorder 403 .set noreorder
404 bgtz $a2,bn_sqr_words_internal 404 bgtz $a2,bn_sqr_word_wise_internal
405 move $v0,$zero 405 move $v0,$zero
406 jr $ra 406 jr $ra
407 move $a0,$v0 407 move $a0,$v0
408.end bn_sqr_words 408.end bn_sqr_word_wise
409 409
410.align 5 410.align 5
411.ent bn_sqr_words_internal 411.ent bn_sqr_word_wise_internal
412bn_sqr_words_internal: 412bn_sqr_word_wise_internal:
413___ 413___
414$code.=<<___ if ($flavour =~ /nubi/i); 414$code.=<<___ if ($flavour =~ /nubi/i);
415 .frame $sp,6*$SZREG,$ra 415 .frame $sp,6*$SZREG,$ra
@@ -427,9 +427,9 @@ $code.=<<___;
427 .set reorder 427 .set reorder
428 li $minus4,-4 428 li $minus4,-4
429 and $ta0,$a2,$minus4 429 and $ta0,$a2,$minus4
430 beqz $ta0,.L_bn_sqr_words_tail 430 beqz $ta0,.L_bn_sqr_word_wise_tail
431 431
432.L_bn_sqr_words_loop: 432.L_bn_sqr_word_wise_loop:
433 $LD $t0,0($a1) 433 $LD $t0,0($a1)
434 $MULTU $t0,$t0 434 $MULTU $t0,$t0
435 $LD $t2,$BNSZ($a1) 435 $LD $t2,$BNSZ($a1)
@@ -463,13 +463,13 @@ $code.=<<___;
463 $ST $ta3,-2*$BNSZ($a0) 463 $ST $ta3,-2*$BNSZ($a0)
464 464
465 .set noreorder 465 .set noreorder
466 bgtz $ta0,.L_bn_sqr_words_loop 466 bgtz $ta0,.L_bn_sqr_word_wise_loop
467 $ST $ta2,-$BNSZ($a0) 467 $ST $ta2,-$BNSZ($a0)
468 468
469 beqz $a2,.L_bn_sqr_words_return 469 beqz $a2,.L_bn_sqr_word_wise_return
470 nop 470 nop
471 471
472.L_bn_sqr_words_tail: 472.L_bn_sqr_word_wise_tail:
473 .set reorder 473 .set reorder
474 $LD $t0,0($a1) 474 $LD $t0,0($a1)
475 $MULTU $t0,$t0 475 $MULTU $t0,$t0
@@ -478,7 +478,7 @@ $code.=<<___;
478 mfhi $t0 478 mfhi $t0
479 $ST $t1,0($a0) 479 $ST $t1,0($a0)
480 $ST $t0,$BNSZ($a0) 480 $ST $t0,$BNSZ($a0)
481 beqz $a2,.L_bn_sqr_words_return 481 beqz $a2,.L_bn_sqr_word_wise_return
482 482
483 $LD $t0,$BNSZ($a1) 483 $LD $t0,$BNSZ($a1)
484 $MULTU $t0,$t0 484 $MULTU $t0,$t0
@@ -487,7 +487,7 @@ $code.=<<___;
487 mfhi $t0 487 mfhi $t0
488 $ST $t1,2*$BNSZ($a0) 488 $ST $t1,2*$BNSZ($a0)
489 $ST $t0,3*$BNSZ($a0) 489 $ST $t0,3*$BNSZ($a0)
490 beqz $a2,.L_bn_sqr_words_return 490 beqz $a2,.L_bn_sqr_word_wise_return
491 491
492 $LD $t0,2*$BNSZ($a1) 492 $LD $t0,2*$BNSZ($a1)
493 $MULTU $t0,$t0 493 $MULTU $t0,$t0
@@ -496,7 +496,7 @@ $code.=<<___;
496 $ST $t1,4*$BNSZ($a0) 496 $ST $t1,4*$BNSZ($a0)
497 $ST $t0,5*$BNSZ($a0) 497 $ST $t0,5*$BNSZ($a0)
498 498
499.L_bn_sqr_words_return: 499.L_bn_sqr_word_wise_return:
500 .set noreorder 500 .set noreorder
501___ 501___
502$code.=<<___ if ($flavour =~ /nubi/i); 502$code.=<<___ if ($flavour =~ /nubi/i);
@@ -511,7 +511,7 @@ $code.=<<___;
511 jr $ra 511 jr $ra
512 move $a0,$v0 512 move $a0,$v0
513 513
514.end bn_sqr_words_internal 514.end bn_sqr_word_wise_internal
515 515
516.align 5 516.align 5
517.globl bn_add_words 517.globl bn_add_words
diff --git a/src/lib/libcrypto/bn/asm/ppc.pl b/src/lib/libcrypto/bn/asm/ppc.pl
index c9b7f9477d..9b8dc55bff 100644
--- a/src/lib/libcrypto/bn/asm/ppc.pl
+++ b/src/lib/libcrypto/bn/asm/ppc.pl
@@ -204,9 +204,9 @@ $data=<<EOF;
204# bn_sub_words 204# bn_sub_words
205# bn_add_words 205# bn_add_words
206# bn_div_words 206# bn_div_words
207# bn_sqr_words 207# bn_sqr_word_wise
208# bn_mul_words 208# bn_mulw_words
209# bn_mul_add_words 209# bn_mulw_add_words
210# 210#
211# NOTE: It is possible to optimize this code more for 211# NOTE: It is possible to optimize this code more for
212# specific PowerPC or Power architectures. On the Northstar 212# specific PowerPC or Power architectures. On the Northstar
@@ -248,9 +248,9 @@ $data=<<EOF;
248 .globl .bn_sub_words 248 .globl .bn_sub_words
249 .globl .bn_add_words 249 .globl .bn_add_words
250 .globl .bn_div_words 250 .globl .bn_div_words
251 .globl .bn_sqr_words 251 .globl .bn_sqr_word_wise
252 .globl .bn_mul_words 252 .globl .bn_mulw_words
253 .globl .bn_mul_add_words 253 .globl .bn_mulw_add_words
254 254
255# .text section 255# .text section
256 256
@@ -1702,16 +1702,16 @@ Lppcasm_div9:
1702 1702
1703# 1703#
1704# NOTE: The following label name should be changed to 1704# NOTE: The following label name should be changed to
1705# "bn_sqr_words" i.e. remove the first dot 1705# "bn_sqr_word_wise" i.e. remove the first dot
1706# for the gcc compiler. This should be automatically 1706# for the gcc compiler. This should be automatically
1707# done in the build 1707# done in the build
1708# 1708#
1709.align 4 1709.align 4
1710.bn_sqr_words: 1710.bn_sqr_word_wise:
1711# 1711#
1712# Optimized version of bn_sqr_words 1712# Optimized version of bn_sqr_word_wise
1713# 1713#
1714# void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) 1714# void bn_sqr_word_wise(BN_ULONG *r, BN_ULONG *a, int n)
1715# 1715#
1716# r3 = r 1716# r3 = r
1717# r4 = a 1717# r4 = a
@@ -1740,15 +1740,15 @@ Lppcasm_sqr_adios:
1740 1740
1741# 1741#
1742# NOTE: The following label name should be changed to 1742# NOTE: The following label name should be changed to
1743# "bn_mul_words" i.e. remove the first dot 1743# "bn_mulw_words" i.e. remove the first dot
1744# for the gcc compiler. This should be automatically 1744# for the gcc compiler. This should be automatically
1745# done in the build 1745# done in the build
1746# 1746#
1747 1747
1748.align 4 1748.align 4
1749.bn_mul_words: 1749.bn_mulw_words:
1750# 1750#
1751# BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) 1751# BN_ULONG bn_mulw_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
1752# 1752#
1753# r3 = rp 1753# r3 = rp
1754# r4 = ap 1754# r4 = ap
@@ -1842,15 +1842,15 @@ Lppcasm_mw_OVER:
1842 1842
1843# 1843#
1844# NOTE: The following label name should be changed to 1844# NOTE: The following label name should be changed to
1845# "bn_mul_add_words" i.e. remove the first dot 1845# "bn_mulw_add_words" i.e. remove the first dot
1846# for the gcc compiler. This should be automatically 1846# for the gcc compiler. This should be automatically
1847# done in the build 1847# done in the build
1848# 1848#
1849 1849
1850.align 4 1850.align 4
1851.bn_mul_add_words: 1851.bn_mulw_add_words:
1852# 1852#
1853# BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) 1853# BN_ULONG bn_mulw_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
1854# 1854#
1855# r3 = rp 1855# r3 = rp
1856# r4 = ap 1856# r4 = ap
diff --git a/src/lib/libcrypto/bn/asm/x86-mont.pl b/src/lib/libcrypto/bn/asm/x86-mont.pl
index 6524651748..3be440f11f 100755
--- a/src/lib/libcrypto/bn/asm/x86-mont.pl
+++ b/src/lib/libcrypto/bn/asm/x86-mont.pl
@@ -32,8 +32,7 @@ require "x86asm.pl";
32 32
33&asm_init($ARGV[0],$0); 33&asm_init($ARGV[0],$0);
34 34
35$sse2=0; 35$sse2=1;
36for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
37 36
38&external_label("OPENSSL_ia32cap_P") if ($sse2); 37&external_label("OPENSSL_ia32cap_P") if ($sse2);
39 38
diff --git a/src/lib/libcrypto/bn/bn.h b/src/lib/libcrypto/bn/bn.h
index 7c3c0b142f..3f9e24a868 100644
--- a/src/lib/libcrypto/bn/bn.h
+++ b/src/lib/libcrypto/bn/bn.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn.h,v 1.80 2025/03/09 15:22:40 tb Exp $ */ 1/* $OpenBSD: bn.h,v 1.85 2025/12/05 17:25:55 tb Exp $ */
2/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -125,6 +125,8 @@
125#ifndef HEADER_BN_H 125#ifndef HEADER_BN_H
126#define HEADER_BN_H 126#define HEADER_BN_H
127 127
128#include <inttypes.h>
129#include <stdint.h>
128#include <stdio.h> 130#include <stdio.h>
129#include <stdlib.h> 131#include <stdlib.h>
130 132
@@ -138,59 +140,17 @@
138extern "C" { 140extern "C" {
139#endif 141#endif
140 142
141/* This next option uses the C libraries (2 word)/(1 word) function. 143#if defined(_LP64) || defined(_WIN64)
142 * If it is not defined, I use my C version (which is slower).
143 * The reason for this flag is that when the particular C compiler
144 * library routine is used, and the library is linked with a different
145 * compiler, the library is missing. This mostly happens when the
146 * library is built with gcc and then linked using normal cc. This would
147 * be a common occurrence because gcc normally produces code that is
148 * 2 times faster than system compilers for the big number stuff.
149 * For machines with only one compiler (or shared libraries), this should
150 * be on. Again this in only really a problem on machines
151 * using "long long's", are 32bit, and are not using my assembler code. */
152/* #define BN_DIV2W */
153
154#ifdef _LP64
155#undef BN_LLONG 144#undef BN_LLONG
156#define BN_ULONG unsigned long 145#define BN_ULONG uint64_t
157#define BN_LONG long
158#define BN_BITS 128
159#define BN_BYTES 8 146#define BN_BYTES 8
160#define BN_BITS2 64 147#define BN_BITS2 64
161#define BN_BITS4 32
162#define BN_MASK2 (0xffffffffffffffffL)
163#define BN_MASK2l (0xffffffffL)
164#define BN_MASK2h (0xffffffff00000000L)
165#define BN_MASK2h1 (0xffffffff80000000L)
166#define BN_TBIT (0x8000000000000000L)
167#define BN_DEC_CONV (10000000000000000000UL)
168#define BN_DEC_FMT1 "%lu"
169#define BN_DEC_FMT2 "%019lu"
170#define BN_DEC_NUM 19
171#define BN_HEX_FMT1 "%lX"
172#define BN_HEX_FMT2 "%016lX"
173#else 148#else
174#define BN_ULLONG unsigned long long 149#define BN_ULLONG uint64_t
175#define BN_LLONG 150#define BN_LLONG
176#define BN_ULONG unsigned int 151#define BN_ULONG uint32_t
177#define BN_LONG int
178#define BN_BITS 64
179#define BN_BYTES 4 152#define BN_BYTES 4
180#define BN_BITS2 32 153#define BN_BITS2 32
181#define BN_BITS4 16
182#define BN_MASK (0xffffffffffffffffLL)
183#define BN_MASK2 (0xffffffffL)
184#define BN_MASK2l (0xffff)
185#define BN_MASK2h1 (0xffff8000L)
186#define BN_MASK2h (0xffff0000L)
187#define BN_TBIT (0x80000000L)
188#define BN_DEC_CONV (1000000000L)
189#define BN_DEC_FMT1 "%u"
190#define BN_DEC_FMT2 "%09u"
191#define BN_DEC_NUM 9
192#define BN_HEX_FMT1 "%X"
193#define BN_HEX_FMT2 "%08X"
194#endif 154#endif
195 155
196#define BN_FLG_MALLOCED 0x01 156#define BN_FLG_MALLOCED 0x01
diff --git a/src/lib/libcrypto/bn/bn_add.c b/src/lib/libcrypto/bn/bn_add.c
index 86768a312a..81fa60e429 100644
--- a/src/lib/libcrypto/bn/bn_add.c
+++ b/src/lib/libcrypto/bn/bn_add.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_add.c,v 1.26 2023/07/08 12:21:58 beck Exp $ */ 1/* $OpenBSD: bn_add.c,v 1.29 2025/05/25 04:53:05 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -60,44 +60,10 @@
60#include <limits.h> 60#include <limits.h>
61#include <stdio.h> 61#include <stdio.h>
62 62
63#include <openssl/err.h>
64
65#include "bn_arch.h" 63#include "bn_arch.h"
66#include "bn_local.h" 64#include "bn_local.h"
67#include "bn_internal.h" 65#include "bn_internal.h"
68 66#include "err_local.h"
69/*
70 * bn_add_words() computes (carry:r[i]) = a[i] + b[i] + carry, where a and b
71 * are both arrays of words. Any carry resulting from the addition is returned.
72 */
73#ifndef HAVE_BN_ADD_WORDS
74BN_ULONG
75bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
76{
77 BN_ULONG carry = 0;
78
79 assert(n >= 0);
80 if (n <= 0)
81 return 0;
82
83 while (n & ~3) {
84 bn_qwaddqw(a[3], a[2], a[1], a[0], b[3], b[2], b[1], b[0],
85 carry, &carry, &r[3], &r[2], &r[1], &r[0]);
86 a += 4;
87 b += 4;
88 r += 4;
89 n -= 4;
90 }
91 while (n) {
92 bn_addw_addw(a[0], b[0], carry, &carry, &r[0]);
93 a++;
94 b++;
95 r++;
96 n--;
97 }
98 return carry;
99}
100#endif
101 67
102/* 68/*
103 * bn_add() computes (carry:r[i]) = a[i] + b[i] + carry, where a and b are both 69 * bn_add() computes (carry:r[i]) = a[i] + b[i] + carry, where a and b are both
@@ -147,40 +113,6 @@ bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b,
147#endif 113#endif
148 114
149/* 115/*
150 * bn_sub_words() computes (borrow:r[i]) = a[i] - b[i] - borrow, where a and b
151 * are both arrays of words. Any borrow resulting from the subtraction is
152 * returned.
153 */
154#ifndef HAVE_BN_SUB_WORDS
155BN_ULONG
156bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
157{
158 BN_ULONG borrow = 0;
159
160 assert(n >= 0);
161 if (n <= 0)
162 return 0;
163
164 while (n & ~3) {
165 bn_qwsubqw(a[3], a[2], a[1], a[0], b[3], b[2], b[1], b[0],
166 borrow, &borrow, &r[3], &r[2], &r[1], &r[0]);
167 a += 4;
168 b += 4;
169 r += 4;
170 n -= 4;
171 }
172 while (n) {
173 bn_subw_subw(a[0], b[0], borrow, &borrow, &r[0]);
174 a++;
175 b++;
176 r++;
177 n--;
178 }
179 return borrow;
180}
181#endif
182
183/*
184 * bn_sub() computes (borrow:r[i]) = a[i] - b[i] - borrow, where a and b are both 116 * bn_sub() computes (borrow:r[i]) = a[i] - b[i] - borrow, where a and b are both
185 * arrays of words (r may be the same as a or b). The length of a and b may 117 * arrays of words (r may be the same as a or b). The length of a and b may
186 * differ, while r must be at least max(a_len, b_len) in length. Any borrow 118 * differ, while r must be at least max(a_len, b_len) in length. Any borrow
@@ -208,7 +140,7 @@ bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b,
208 /* XXX - consider doing four at a time to match bn_sub_words. */ 140 /* XXX - consider doing four at a time to match bn_sub_words. */
209 while (diff_len < 0) { 141 while (diff_len < 0) {
210 /* Compute r[0] = 0 - b[0] - borrow. */ 142 /* Compute r[0] = 0 - b[0] - borrow. */
211 bn_subw(0 - b[0], borrow, &borrow, &r[0]); 143 bn_subw_subw(0, b[0], borrow, &borrow, &r[0]);
212 diff_len++; 144 diff_len++;
213 b++; 145 b++;
214 r++; 146 r++;
@@ -217,7 +149,7 @@ bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b,
217 /* XXX - consider doing four at a time to match bn_sub_words. */ 149 /* XXX - consider doing four at a time to match bn_sub_words. */
218 while (diff_len > 0) { 150 while (diff_len > 0) {
219 /* Compute r[0] = a[0] - 0 - borrow. */ 151 /* Compute r[0] = a[0] - 0 - borrow. */
220 bn_subw(a[0], borrow, &borrow, &r[0]); 152 bn_subw_subw(a[0], 0, borrow, &borrow, &r[0]);
221 diff_len--; 153 diff_len--;
222 a++; 154 a++;
223 r++; 155 r++;
diff --git a/src/lib/libcrypto/bn/bn_add_sub.c b/src/lib/libcrypto/bn/bn_add_sub.c
new file mode 100644
index 0000000000..5c9d5a2b1a
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_add_sub.c
@@ -0,0 +1,178 @@
1/* $OpenBSD: bn_add_sub.c,v 1.1 2025/05/25 04:30:55 jsing Exp $ */
2/*
3 * Copyright (c) 2023,2024,2025 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <openssl/bn.h>
19
20#include "bn_internal.h"
21
22/*
23 * bn_add_words() computes (carry:r[i]) = a[i] + b[i] + carry, where a and b
24 * are both arrays of words. Any carry resulting from the addition is returned.
25 */
26#ifndef HAVE_BN_ADD_WORDS
27BN_ULONG
28bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
29{
30 BN_ULONG carry = 0;
31
32 while (n >= 4) {
33 bn_qwaddqw(a[3], a[2], a[1], a[0], b[3], b[2], b[1], b[0],
34 carry, &carry, &r[3], &r[2], &r[1], &r[0]);
35 a += 4;
36 b += 4;
37 r += 4;
38 n -= 4;
39 }
40 while (n > 0) {
41 bn_addw_addw(a[0], b[0], carry, &carry, &r[0]);
42 a++;
43 b++;
44 r++;
45 n--;
46 }
47
48 return carry;
49}
50#endif
51
52/*
53 * bn_sub_words() computes (borrow:r[i]) = a[i] - b[i] - borrow, where a and b
54 * are both arrays of words. Any borrow resulting from the subtraction is
55 * returned.
56 */
57#ifndef HAVE_BN_SUB_WORDS
58BN_ULONG
59bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
60{
61 BN_ULONG borrow = 0;
62
63 while (n >= 4) {
64 bn_qwsubqw(a[3], a[2], a[1], a[0], b[3], b[2], b[1], b[0],
65 borrow, &borrow, &r[3], &r[2], &r[1], &r[0]);
66 a += 4;
67 b += 4;
68 r += 4;
69 n -= 4;
70 }
71 while (n > 0) {
72 bn_subw_subw(a[0], b[0], borrow, &borrow, &r[0]);
73 a++;
74 b++;
75 r++;
76 n--;
77 }
78
79 return borrow;
80}
81#endif
82
83/*
84 * bn_sub_borrow() computes a[i] - b[i], returning the resulting borrow only.
85 */
86#ifndef HAVE_BN_SUB_WORDS_BORROW
87BN_ULONG
88bn_sub_words_borrow(const BN_ULONG *a, const BN_ULONG *b, size_t n)
89{
90 BN_ULONG borrow = 0;
91 BN_ULONG r;
92
93 while (n >= 4) {
94 bn_qwsubqw(a[3], a[2], a[1], a[0], b[3], b[2], b[1], b[0],
95 borrow, &borrow, &r, &r, &r, &r);
96 a += 4;
97 b += 4;
98 n -= 4;
99 }
100 while (n > 0) {
101 bn_subw_subw(a[0], b[0], borrow, &borrow, &r);
102 a++;
103 b++;
104 n--;
105 }
106
107 return borrow;
108}
109#endif
110
111/*
112 * bn_add_words_masked() computes r[] = a[] + (b[] & mask), where a, b and r are
113 * arrays of words with length n (r may be the same as a or b).
114 */
115#ifndef HAVE_BN_ADD_WORDS_MASKED
116BN_ULONG
117bn_add_words_masked(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
118 BN_ULONG mask, size_t n)
119{
120 BN_ULONG carry = 0;
121
122 /* XXX - consider conditional/masked versions of bn_addw_addw/bn_qwaddqw. */
123
124 while (n >= 4) {
125 bn_qwaddqw(a[3], a[2], a[1], a[0], b[3] & mask, b[2] & mask,
126 b[1] & mask, b[0] & mask, carry, &carry, &r[3], &r[2],
127 &r[1], &r[0]);
128 a += 4;
129 b += 4;
130 r += 4;
131 n -= 4;
132 }
133 while (n > 0) {
134 bn_addw_addw(a[0], b[0] & mask, carry, &carry, &r[0]);
135 a++;
136 b++;
137 r++;
138 n--;
139 }
140
141 return carry;
142}
143#endif
144
145/*
146 * bn_sub_words_masked() computes r[] = a[] - (b[] & mask), where a, b and r are
147 * arrays of words with length n (r may be the same as a or b).
148 */
149#ifndef HAVE_BN_SUB_WORDS_MASKED
150BN_ULONG
151bn_sub_words_masked(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
152 BN_ULONG mask, size_t n)
153{
154 BN_ULONG borrow = 0;
155
156 /* XXX - consider conditional/masked versions of bn_subw_subw/bn_qwsubqw. */
157
158 /* Compute conditional r[i] = a[i] - b[i]. */
159 while (n >= 4) {
160 bn_qwsubqw(a[3], a[2], a[1], a[0], b[3] & mask, b[2] & mask,
161 b[1] & mask, b[0] & mask, borrow, &borrow, &r[3], &r[2],
162 &r[1], &r[0]);
163 a += 4;
164 b += 4;
165 r += 4;
166 n -= 4;
167 }
168 while (n > 0) {
169 bn_subw_subw(a[0], b[0] & mask, borrow, &borrow, &r[0]);
170 a++;
171 b++;
172 r++;
173 n--;
174 }
175
176 return borrow;
177}
178#endif
diff --git a/src/lib/libcrypto/bn/bn_const.c b/src/lib/libcrypto/bn/bn_const.c
index bf684c8a46..389e95ca15 100644
--- a/src/lib/libcrypto/bn/bn_const.c
+++ b/src/lib/libcrypto/bn/bn_const.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_const.c,v 1.8 2023/07/28 10:07:30 tb Exp $ */ 1/* $OpenBSD: bn_const.c,v 1.9 2026/01/23 08:29:04 tb Exp $ */
2/* Insert boilerplate */ 2/* Insert boilerplate */
3 3
4#include <openssl/bn.h> 4#include <openssl/bn.h>
@@ -431,3 +431,295 @@ BN_get_rfc3526_prime_8192(BIGNUM *bn)
431 return BN_bin2bn(RFC3526_PRIME_8192, sizeof(RFC3526_PRIME_8192), bn); 431 return BN_bin2bn(RFC3526_PRIME_8192, sizeof(RFC3526_PRIME_8192), bn);
432} 432}
433LCRYPTO_ALIAS(BN_get_rfc3526_prime_8192); 433LCRYPTO_ALIAS(BN_get_rfc3526_prime_8192);
434
435static const unsigned char RFC7919_PRIME_2048[] = {
436 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xAD, 0xF8, 0x54, 0x58,
437 0xA2, 0xBB, 0x4A, 0x9A, 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1,
438 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, 0xA9, 0xE1, 0x36, 0x41,
439 0x14, 0x64, 0x33, 0xFB, 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9,
440 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, 0xF6, 0x81, 0xB2, 0x02,
441 0xAE, 0xC4, 0x61, 0x7A, 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61,
442 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, 0x85, 0x63, 0x65, 0x55,
443 0x3D, 0xED, 0x1A, 0xF3, 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35,
444 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, 0xE2, 0xA6, 0x89, 0xDA,
445 0xF3, 0xEF, 0xE8, 0x72, 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35,
446 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, 0xBC, 0x0A, 0xB1, 0x82,
447 0xB3, 0x24, 0xFB, 0x61, 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB,
448 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, 0x1D, 0x4F, 0x42, 0xA3,
449 0xDE, 0x39, 0x4D, 0xF4, 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19,
450 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, 0x9E, 0x02, 0xFC, 0xE1,
451 0xCD, 0xF7, 0xE2, 0xEC, 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61,
452 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, 0x8E, 0x4F, 0x12, 0x32,
453 0xEE, 0xF2, 0x81, 0x83, 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73,
454 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, 0xC5, 0x8E, 0xF1, 0x83,
455 0x7D, 0x16, 0x83, 0xB2, 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA,
456 0x88, 0x6B, 0x42, 0x38, 0x61, 0x28, 0x5C, 0x97, 0xFF, 0xFF, 0xFF, 0xFF,
457 0xFF, 0xFF, 0xFF, 0xFF,
458};
459
460BIGNUM *
461BN_get_rfc7919_prime_2048(BIGNUM *bn)
462{
463 return BN_bin2bn(RFC7919_PRIME_2048, sizeof(RFC7919_PRIME_2048), bn);
464}
465
466static const unsigned char RFC7919_PRIME_3072[] = {
467 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xAD, 0xF8, 0x54, 0x58,
468 0xA2, 0xBB, 0x4A, 0x9A, 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1,
469 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, 0xA9, 0xE1, 0x36, 0x41,
470 0x14, 0x64, 0x33, 0xFB, 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9,
471 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, 0xF6, 0x81, 0xB2, 0x02,
472 0xAE, 0xC4, 0x61, 0x7A, 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61,
473 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, 0x85, 0x63, 0x65, 0x55,
474 0x3D, 0xED, 0x1A, 0xF3, 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35,
475 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, 0xE2, 0xA6, 0x89, 0xDA,
476 0xF3, 0xEF, 0xE8, 0x72, 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35,
477 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, 0xBC, 0x0A, 0xB1, 0x82,
478 0xB3, 0x24, 0xFB, 0x61, 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB,
479 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, 0x1D, 0x4F, 0x42, 0xA3,
480 0xDE, 0x39, 0x4D, 0xF4, 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19,
481 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, 0x9E, 0x02, 0xFC, 0xE1,
482 0xCD, 0xF7, 0xE2, 0xEC, 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61,
483 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, 0x8E, 0x4F, 0x12, 0x32,
484 0xEE, 0xF2, 0x81, 0x83, 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73,
485 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, 0xC5, 0x8E, 0xF1, 0x83,
486 0x7D, 0x16, 0x83, 0xB2, 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA,
487 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, 0xDE, 0x35, 0x5B, 0x3B,
488 0x65, 0x19, 0x03, 0x5B, 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38,
489 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, 0x7A, 0xD9, 0x1D, 0x26,
490 0x91, 0xF7, 0xF7, 0xEE, 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C,
491 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, 0xB4, 0x13, 0x0C, 0x93,
492 0xBC, 0x43, 0x79, 0x44, 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3,
493 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, 0x5C, 0xAE, 0x82, 0xAB,
494 0x9C, 0x9D, 0xF6, 0x9E, 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D,
495 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, 0x1D, 0xBF, 0x9A, 0x42,
496 0xD5, 0xC4, 0x48, 0x4E, 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF,
497 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, 0x25, 0xE4, 0x1D, 0x2B,
498 0x66, 0xC6, 0x2E, 0x37, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
499};
500
501BIGNUM *
502BN_get_rfc7919_prime_3072(BIGNUM *bn)
503{
504 return BN_bin2bn(RFC7919_PRIME_3072, sizeof(RFC7919_PRIME_3072), bn);
505}
506
507static const unsigned char RFC7919_PRIME_4096[] = {
508 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xAD, 0xF8, 0x54, 0x58,
509 0xA2, 0xBB, 0x4A, 0x9A, 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1,
510 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, 0xA9, 0xE1, 0x36, 0x41,
511 0x14, 0x64, 0x33, 0xFB, 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9,
512 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, 0xF6, 0x81, 0xB2, 0x02,
513 0xAE, 0xC4, 0x61, 0x7A, 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61,
514 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, 0x85, 0x63, 0x65, 0x55,
515 0x3D, 0xED, 0x1A, 0xF3, 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35,
516 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, 0xE2, 0xA6, 0x89, 0xDA,
517 0xF3, 0xEF, 0xE8, 0x72, 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35,
518 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, 0xBC, 0x0A, 0xB1, 0x82,
519 0xB3, 0x24, 0xFB, 0x61, 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB,
520 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, 0x1D, 0x4F, 0x42, 0xA3,
521 0xDE, 0x39, 0x4D, 0xF4, 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19,
522 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, 0x9E, 0x02, 0xFC, 0xE1,
523 0xCD, 0xF7, 0xE2, 0xEC, 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61,
524 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, 0x8E, 0x4F, 0x12, 0x32,
525 0xEE, 0xF2, 0x81, 0x83, 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73,
526 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, 0xC5, 0x8E, 0xF1, 0x83,
527 0x7D, 0x16, 0x83, 0xB2, 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA,
528 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, 0xDE, 0x35, 0x5B, 0x3B,
529 0x65, 0x19, 0x03, 0x5B, 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38,
530 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, 0x7A, 0xD9, 0x1D, 0x26,
531 0x91, 0xF7, 0xF7, 0xEE, 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C,
532 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, 0xB4, 0x13, 0x0C, 0x93,
533 0xBC, 0x43, 0x79, 0x44, 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3,
534 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, 0x5C, 0xAE, 0x82, 0xAB,
535 0x9C, 0x9D, 0xF6, 0x9E, 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D,
536 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, 0x1D, 0xBF, 0x9A, 0x42,
537 0xD5, 0xC4, 0x48, 0x4E, 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF,
538 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, 0x25, 0xE4, 0x1D, 0x2B,
539 0x66, 0x9E, 0x1E, 0xF1, 0x6E, 0x6F, 0x52, 0xC3, 0x16, 0x4D, 0xF4, 0xFB,
540 0x79, 0x30, 0xE9, 0xE4, 0xE5, 0x88, 0x57, 0xB6, 0xAC, 0x7D, 0x5F, 0x42,
541 0xD6, 0x9F, 0x6D, 0x18, 0x77, 0x63, 0xCF, 0x1D, 0x55, 0x03, 0x40, 0x04,
542 0x87, 0xF5, 0x5B, 0xA5, 0x7E, 0x31, 0xCC, 0x7A, 0x71, 0x35, 0xC8, 0x86,
543 0xEF, 0xB4, 0x31, 0x8A, 0xED, 0x6A, 0x1E, 0x01, 0x2D, 0x9E, 0x68, 0x32,
544 0xA9, 0x07, 0x60, 0x0A, 0x91, 0x81, 0x30, 0xC4, 0x6D, 0xC7, 0x78, 0xF9,
545 0x71, 0xAD, 0x00, 0x38, 0x09, 0x29, 0x99, 0xA3, 0x33, 0xCB, 0x8B, 0x7A,
546 0x1A, 0x1D, 0xB9, 0x3D, 0x71, 0x40, 0x00, 0x3C, 0x2A, 0x4E, 0xCE, 0xA9,
547 0xF9, 0x8D, 0x0A, 0xCC, 0x0A, 0x82, 0x91, 0xCD, 0xCE, 0xC9, 0x7D, 0xCF,
548 0x8E, 0xC9, 0xB5, 0x5A, 0x7F, 0x88, 0xA4, 0x6B, 0x4D, 0xB5, 0xA8, 0x51,
549 0xF4, 0x41, 0x82, 0xE1, 0xC6, 0x8A, 0x00, 0x7E, 0x5E, 0x65, 0x5F, 0x6A,
550 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
551};
552
553BIGNUM *
554BN_get_rfc7919_prime_4096(BIGNUM *bn)
555{
556 return BN_bin2bn(RFC7919_PRIME_4096, sizeof(RFC7919_PRIME_4096), bn);
557}
558
559static const unsigned char RFC7919_PRIME_6144[] = {
560 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xAD, 0xF8, 0x54, 0x58,
561 0xA2, 0xBB, 0x4A, 0x9A, 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1,
562 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, 0xA9, 0xE1, 0x36, 0x41,
563 0x14, 0x64, 0x33, 0xFB, 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9,
564 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, 0xF6, 0x81, 0xB2, 0x02,
565 0xAE, 0xC4, 0x61, 0x7A, 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61,
566 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, 0x85, 0x63, 0x65, 0x55,
567 0x3D, 0xED, 0x1A, 0xF3, 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35,
568 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, 0xE2, 0xA6, 0x89, 0xDA,
569 0xF3, 0xEF, 0xE8, 0x72, 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35,
570 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, 0xBC, 0x0A, 0xB1, 0x82,
571 0xB3, 0x24, 0xFB, 0x61, 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB,
572 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, 0x1D, 0x4F, 0x42, 0xA3,
573 0xDE, 0x39, 0x4D, 0xF4, 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19,
574 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, 0x9E, 0x02, 0xFC, 0xE1,
575 0xCD, 0xF7, 0xE2, 0xEC, 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61,
576 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, 0x8E, 0x4F, 0x12, 0x32,
577 0xEE, 0xF2, 0x81, 0x83, 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73,
578 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, 0xC5, 0x8E, 0xF1, 0x83,
579 0x7D, 0x16, 0x83, 0xB2, 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA,
580 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, 0xDE, 0x35, 0x5B, 0x3B,
581 0x65, 0x19, 0x03, 0x5B, 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38,
582 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, 0x7A, 0xD9, 0x1D, 0x26,
583 0x91, 0xF7, 0xF7, 0xEE, 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C,
584 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, 0xB4, 0x13, 0x0C, 0x93,
585 0xBC, 0x43, 0x79, 0x44, 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3,
586 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, 0x5C, 0xAE, 0x82, 0xAB,
587 0x9C, 0x9D, 0xF6, 0x9E, 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D,
588 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, 0x1D, 0xBF, 0x9A, 0x42,
589 0xD5, 0xC4, 0x48, 0x4E, 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF,
590 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, 0x25, 0xE4, 0x1D, 0x2B,
591 0x66, 0x9E, 0x1E, 0xF1, 0x6E, 0x6F, 0x52, 0xC3, 0x16, 0x4D, 0xF4, 0xFB,
592 0x79, 0x30, 0xE9, 0xE4, 0xE5, 0x88, 0x57, 0xB6, 0xAC, 0x7D, 0x5F, 0x42,
593 0xD6, 0x9F, 0x6D, 0x18, 0x77, 0x63, 0xCF, 0x1D, 0x55, 0x03, 0x40, 0x04,
594 0x87, 0xF5, 0x5B, 0xA5, 0x7E, 0x31, 0xCC, 0x7A, 0x71, 0x35, 0xC8, 0x86,
595 0xEF, 0xB4, 0x31, 0x8A, 0xED, 0x6A, 0x1E, 0x01, 0x2D, 0x9E, 0x68, 0x32,
596 0xA9, 0x07, 0x60, 0x0A, 0x91, 0x81, 0x30, 0xC4, 0x6D, 0xC7, 0x78, 0xF9,
597 0x71, 0xAD, 0x00, 0x38, 0x09, 0x29, 0x99, 0xA3, 0x33, 0xCB, 0x8B, 0x7A,
598 0x1A, 0x1D, 0xB9, 0x3D, 0x71, 0x40, 0x00, 0x3C, 0x2A, 0x4E, 0xCE, 0xA9,
599 0xF9, 0x8D, 0x0A, 0xCC, 0x0A, 0x82, 0x91, 0xCD, 0xCE, 0xC9, 0x7D, 0xCF,
600 0x8E, 0xC9, 0xB5, 0x5A, 0x7F, 0x88, 0xA4, 0x6B, 0x4D, 0xB5, 0xA8, 0x51,
601 0xF4, 0x41, 0x82, 0xE1, 0xC6, 0x8A, 0x00, 0x7E, 0x5E, 0x0D, 0xD9, 0x02,
602 0x0B, 0xFD, 0x64, 0xB6, 0x45, 0x03, 0x6C, 0x7A, 0x4E, 0x67, 0x7D, 0x2C,
603 0x38, 0x53, 0x2A, 0x3A, 0x23, 0xBA, 0x44, 0x42, 0xCA, 0xF5, 0x3E, 0xA6,
604 0x3B, 0xB4, 0x54, 0x32, 0x9B, 0x76, 0x24, 0xC8, 0x91, 0x7B, 0xDD, 0x64,
605 0xB1, 0xC0, 0xFD, 0x4C, 0xB3, 0x8E, 0x8C, 0x33, 0x4C, 0x70, 0x1C, 0x3A,
606 0xCD, 0xAD, 0x06, 0x57, 0xFC, 0xCF, 0xEC, 0x71, 0x9B, 0x1F, 0x5C, 0x3E,
607 0x4E, 0x46, 0x04, 0x1F, 0x38, 0x81, 0x47, 0xFB, 0x4C, 0xFD, 0xB4, 0x77,
608 0xA5, 0x24, 0x71, 0xF7, 0xA9, 0xA9, 0x69, 0x10, 0xB8, 0x55, 0x32, 0x2E,
609 0xDB, 0x63, 0x40, 0xD8, 0xA0, 0x0E, 0xF0, 0x92, 0x35, 0x05, 0x11, 0xE3,
610 0x0A, 0xBE, 0xC1, 0xFF, 0xF9, 0xE3, 0xA2, 0x6E, 0x7F, 0xB2, 0x9F, 0x8C,
611 0x18, 0x30, 0x23, 0xC3, 0x58, 0x7E, 0x38, 0xDA, 0x00, 0x77, 0xD9, 0xB4,
612 0x76, 0x3E, 0x4E, 0x4B, 0x94, 0xB2, 0xBB, 0xC1, 0x94, 0xC6, 0x65, 0x1E,
613 0x77, 0xCA, 0xF9, 0x92, 0xEE, 0xAA, 0xC0, 0x23, 0x2A, 0x28, 0x1B, 0xF6,
614 0xB3, 0xA7, 0x39, 0xC1, 0x22, 0x61, 0x16, 0x82, 0x0A, 0xE8, 0xDB, 0x58,
615 0x47, 0xA6, 0x7C, 0xBE, 0xF9, 0xC9, 0x09, 0x1B, 0x46, 0x2D, 0x53, 0x8C,
616 0xD7, 0x2B, 0x03, 0x74, 0x6A, 0xE7, 0x7F, 0x5E, 0x62, 0x29, 0x2C, 0x31,
617 0x15, 0x62, 0xA8, 0x46, 0x50, 0x5D, 0xC8, 0x2D, 0xB8, 0x54, 0x33, 0x8A,
618 0xE4, 0x9F, 0x52, 0x35, 0xC9, 0x5B, 0x91, 0x17, 0x8C, 0xCF, 0x2D, 0xD5,
619 0xCA, 0xCE, 0xF4, 0x03, 0xEC, 0x9D, 0x18, 0x10, 0xC6, 0x27, 0x2B, 0x04,
620 0x5B, 0x3B, 0x71, 0xF9, 0xDC, 0x6B, 0x80, 0xD6, 0x3F, 0xDD, 0x4A, 0x8E,
621 0x9A, 0xDB, 0x1E, 0x69, 0x62, 0xA6, 0x95, 0x26, 0xD4, 0x31, 0x61, 0xC1,
622 0xA4, 0x1D, 0x57, 0x0D, 0x79, 0x38, 0xDA, 0xD4, 0xA4, 0x0E, 0x32, 0x9C,
623 0xD0, 0xE4, 0x0E, 0x65, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
624};
625
626BIGNUM *
627BN_get_rfc7919_prime_6144(BIGNUM *bn)
628{
629 return BN_bin2bn(RFC7919_PRIME_6144, sizeof(RFC7919_PRIME_6144), bn);
630}
631
632static const unsigned char RFC7919_PRIME_8192[] = {
633 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xAD, 0xF8, 0x54, 0x58,
634 0xA2, 0xBB, 0x4A, 0x9A, 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1,
635 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, 0xA9, 0xE1, 0x36, 0x41,
636 0x14, 0x64, 0x33, 0xFB, 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9,
637 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, 0xF6, 0x81, 0xB2, 0x02,
638 0xAE, 0xC4, 0x61, 0x7A, 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61,
639 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, 0x85, 0x63, 0x65, 0x55,
640 0x3D, 0xED, 0x1A, 0xF3, 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35,
641 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, 0xE2, 0xA6, 0x89, 0xDA,
642 0xF3, 0xEF, 0xE8, 0x72, 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35,
643 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, 0xBC, 0x0A, 0xB1, 0x82,
644 0xB3, 0x24, 0xFB, 0x61, 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB,
645 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, 0x1D, 0x4F, 0x42, 0xA3,
646 0xDE, 0x39, 0x4D, 0xF4, 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19,
647 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, 0x9E, 0x02, 0xFC, 0xE1,
648 0xCD, 0xF7, 0xE2, 0xEC, 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61,
649 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, 0x8E, 0x4F, 0x12, 0x32,
650 0xEE, 0xF2, 0x81, 0x83, 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73,
651 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, 0xC5, 0x8E, 0xF1, 0x83,
652 0x7D, 0x16, 0x83, 0xB2, 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA,
653 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, 0xDE, 0x35, 0x5B, 0x3B,
654 0x65, 0x19, 0x03, 0x5B, 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38,
655 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, 0x7A, 0xD9, 0x1D, 0x26,
656 0x91, 0xF7, 0xF7, 0xEE, 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C,
657 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, 0xB4, 0x13, 0x0C, 0x93,
658 0xBC, 0x43, 0x79, 0x44, 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3,
659 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, 0x5C, 0xAE, 0x82, 0xAB,
660 0x9C, 0x9D, 0xF6, 0x9E, 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D,
661 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, 0x1D, 0xBF, 0x9A, 0x42,
662 0xD5, 0xC4, 0x48, 0x4E, 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF,
663 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, 0x25, 0xE4, 0x1D, 0x2B,
664 0x66, 0x9E, 0x1E, 0xF1, 0x6E, 0x6F, 0x52, 0xC3, 0x16, 0x4D, 0xF4, 0xFB,
665 0x79, 0x30, 0xE9, 0xE4, 0xE5, 0x88, 0x57, 0xB6, 0xAC, 0x7D, 0x5F, 0x42,
666 0xD6, 0x9F, 0x6D, 0x18, 0x77, 0x63, 0xCF, 0x1D, 0x55, 0x03, 0x40, 0x04,
667 0x87, 0xF5, 0x5B, 0xA5, 0x7E, 0x31, 0xCC, 0x7A, 0x71, 0x35, 0xC8, 0x86,
668 0xEF, 0xB4, 0x31, 0x8A, 0xED, 0x6A, 0x1E, 0x01, 0x2D, 0x9E, 0x68, 0x32,
669 0xA9, 0x07, 0x60, 0x0A, 0x91, 0x81, 0x30, 0xC4, 0x6D, 0xC7, 0x78, 0xF9,
670 0x71, 0xAD, 0x00, 0x38, 0x09, 0x29, 0x99, 0xA3, 0x33, 0xCB, 0x8B, 0x7A,
671 0x1A, 0x1D, 0xB9, 0x3D, 0x71, 0x40, 0x00, 0x3C, 0x2A, 0x4E, 0xCE, 0xA9,
672 0xF9, 0x8D, 0x0A, 0xCC, 0x0A, 0x82, 0x91, 0xCD, 0xCE, 0xC9, 0x7D, 0xCF,
673 0x8E, 0xC9, 0xB5, 0x5A, 0x7F, 0x88, 0xA4, 0x6B, 0x4D, 0xB5, 0xA8, 0x51,
674 0xF4, 0x41, 0x82, 0xE1, 0xC6, 0x8A, 0x00, 0x7E, 0x5E, 0x0D, 0xD9, 0x02,
675 0x0B, 0xFD, 0x64, 0xB6, 0x45, 0x03, 0x6C, 0x7A, 0x4E, 0x67, 0x7D, 0x2C,
676 0x38, 0x53, 0x2A, 0x3A, 0x23, 0xBA, 0x44, 0x42, 0xCA, 0xF5, 0x3E, 0xA6,
677 0x3B, 0xB4, 0x54, 0x32, 0x9B, 0x76, 0x24, 0xC8, 0x91, 0x7B, 0xDD, 0x64,
678 0xB1, 0xC0, 0xFD, 0x4C, 0xB3, 0x8E, 0x8C, 0x33, 0x4C, 0x70, 0x1C, 0x3A,
679 0xCD, 0xAD, 0x06, 0x57, 0xFC, 0xCF, 0xEC, 0x71, 0x9B, 0x1F, 0x5C, 0x3E,
680 0x4E, 0x46, 0x04, 0x1F, 0x38, 0x81, 0x47, 0xFB, 0x4C, 0xFD, 0xB4, 0x77,
681 0xA5, 0x24, 0x71, 0xF7, 0xA9, 0xA9, 0x69, 0x10, 0xB8, 0x55, 0x32, 0x2E,
682 0xDB, 0x63, 0x40, 0xD8, 0xA0, 0x0E, 0xF0, 0x92, 0x35, 0x05, 0x11, 0xE3,
683 0x0A, 0xBE, 0xC1, 0xFF, 0xF9, 0xE3, 0xA2, 0x6E, 0x7F, 0xB2, 0x9F, 0x8C,
684 0x18, 0x30, 0x23, 0xC3, 0x58, 0x7E, 0x38, 0xDA, 0x00, 0x77, 0xD9, 0xB4,
685 0x76, 0x3E, 0x4E, 0x4B, 0x94, 0xB2, 0xBB, 0xC1, 0x94, 0xC6, 0x65, 0x1E,
686 0x77, 0xCA, 0xF9, 0x92, 0xEE, 0xAA, 0xC0, 0x23, 0x2A, 0x28, 0x1B, 0xF6,
687 0xB3, 0xA7, 0x39, 0xC1, 0x22, 0x61, 0x16, 0x82, 0x0A, 0xE8, 0xDB, 0x58,
688 0x47, 0xA6, 0x7C, 0xBE, 0xF9, 0xC9, 0x09, 0x1B, 0x46, 0x2D, 0x53, 0x8C,
689 0xD7, 0x2B, 0x03, 0x74, 0x6A, 0xE7, 0x7F, 0x5E, 0x62, 0x29, 0x2C, 0x31,
690 0x15, 0x62, 0xA8, 0x46, 0x50, 0x5D, 0xC8, 0x2D, 0xB8, 0x54, 0x33, 0x8A,
691 0xE4, 0x9F, 0x52, 0x35, 0xC9, 0x5B, 0x91, 0x17, 0x8C, 0xCF, 0x2D, 0xD5,
692 0xCA, 0xCE, 0xF4, 0x03, 0xEC, 0x9D, 0x18, 0x10, 0xC6, 0x27, 0x2B, 0x04,
693 0x5B, 0x3B, 0x71, 0xF9, 0xDC, 0x6B, 0x80, 0xD6, 0x3F, 0xDD, 0x4A, 0x8E,
694 0x9A, 0xDB, 0x1E, 0x69, 0x62, 0xA6, 0x95, 0x26, 0xD4, 0x31, 0x61, 0xC1,
695 0xA4, 0x1D, 0x57, 0x0D, 0x79, 0x38, 0xDA, 0xD4, 0xA4, 0x0E, 0x32, 0x9C,
696 0xCF, 0xF4, 0x6A, 0xAA, 0x36, 0xAD, 0x00, 0x4C, 0xF6, 0x00, 0xC8, 0x38,
697 0x1E, 0x42, 0x5A, 0x31, 0xD9, 0x51, 0xAE, 0x64, 0xFD, 0xB2, 0x3F, 0xCE,
698 0xC9, 0x50, 0x9D, 0x43, 0x68, 0x7F, 0xEB, 0x69, 0xED, 0xD1, 0xCC, 0x5E,
699 0x0B, 0x8C, 0xC3, 0xBD, 0xF6, 0x4B, 0x10, 0xEF, 0x86, 0xB6, 0x31, 0x42,
700 0xA3, 0xAB, 0x88, 0x29, 0x55, 0x5B, 0x2F, 0x74, 0x7C, 0x93, 0x26, 0x65,
701 0xCB, 0x2C, 0x0F, 0x1C, 0xC0, 0x1B, 0xD7, 0x02, 0x29, 0x38, 0x88, 0x39,
702 0xD2, 0xAF, 0x05, 0xE4, 0x54, 0x50, 0x4A, 0xC7, 0x8B, 0x75, 0x82, 0x82,
703 0x28, 0x46, 0xC0, 0xBA, 0x35, 0xC3, 0x5F, 0x5C, 0x59, 0x16, 0x0C, 0xC0,
704 0x46, 0xFD, 0x82, 0x51, 0x54, 0x1F, 0xC6, 0x8C, 0x9C, 0x86, 0xB0, 0x22,
705 0xBB, 0x70, 0x99, 0x87, 0x6A, 0x46, 0x0E, 0x74, 0x51, 0xA8, 0xA9, 0x31,
706 0x09, 0x70, 0x3F, 0xEE, 0x1C, 0x21, 0x7E, 0x6C, 0x38, 0x26, 0xE5, 0x2C,
707 0x51, 0xAA, 0x69, 0x1E, 0x0E, 0x42, 0x3C, 0xFC, 0x99, 0xE9, 0xE3, 0x16,
708 0x50, 0xC1, 0x21, 0x7B, 0x62, 0x48, 0x16, 0xCD, 0xAD, 0x9A, 0x95, 0xF9,
709 0xD5, 0xB8, 0x01, 0x94, 0x88, 0xD9, 0xC0, 0xA0, 0xA1, 0xFE, 0x30, 0x75,
710 0xA5, 0x77, 0xE2, 0x31, 0x83, 0xF8, 0x1D, 0x4A, 0x3F, 0x2F, 0xA4, 0x57,
711 0x1E, 0xFC, 0x8C, 0xE0, 0xBA, 0x8A, 0x4F, 0xE8, 0xB6, 0x85, 0x5D, 0xFE,
712 0x72, 0xB0, 0xA6, 0x6E, 0xDE, 0xD2, 0xFB, 0xAB, 0xFB, 0xE5, 0x8A, 0x30,
713 0xFA, 0xFA, 0xBE, 0x1C, 0x5D, 0x71, 0xA8, 0x7E, 0x2F, 0x74, 0x1E, 0xF8,
714 0xC1, 0xFE, 0x86, 0xFE, 0xA6, 0xBB, 0xFD, 0xE5, 0x30, 0x67, 0x7F, 0x0D,
715 0x97, 0xD1, 0x1D, 0x49, 0xF7, 0xA8, 0x44, 0x3D, 0x08, 0x22, 0xE5, 0x06,
716 0xA9, 0xF4, 0x61, 0x4E, 0x01, 0x1E, 0x2A, 0x94, 0x83, 0x8F, 0xF8, 0x8C,
717 0xD6, 0x8C, 0x8B, 0xB7, 0xC5, 0xC6, 0x42, 0x4C, 0xFF, 0xFF, 0xFF, 0xFF,
718 0xFF, 0xFF, 0xFF, 0xFF,
719};
720
721BIGNUM *
722BN_get_rfc7919_prime_8192(BIGNUM *bn)
723{
724 return BN_bin2bn(RFC7919_PRIME_8192, sizeof(RFC7919_PRIME_8192), bn);
725}
diff --git a/src/lib/libcrypto/bn/bn_convert.c b/src/lib/libcrypto/bn/bn_convert.c
index 6a6354f44e..ab5bc519c8 100644
--- a/src/lib/libcrypto/bn/bn_convert.c
+++ b/src/lib/libcrypto/bn/bn_convert.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_convert.c,v 1.23 2024/11/08 14:18:44 jsing Exp $ */ 1/* $OpenBSD: bn_convert.c,v 1.25 2025/12/05 14:12:32 tb Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -65,11 +65,19 @@
65 65
66#include <openssl/bio.h> 66#include <openssl/bio.h>
67#include <openssl/buffer.h> 67#include <openssl/buffer.h>
68#include <openssl/err.h>
69 68
70#include "bn_local.h" 69#include "bn_local.h"
71#include "bytestring.h" 70#include "bytestring.h"
72#include "crypto_internal.h" 71#include "crypto_internal.h"
72#include "err_local.h"
73
74#if BN_BYTES == 8
75#define BN_DEC_CONV UINT64_C(10000000000000000000)
76#define BN_DEC_NUM 19
77#else
78#define BN_DEC_CONV UINT32_C(1000000000)
79#define BN_DEC_NUM 9
80#endif
73 81
74static int bn_dec2bn_cbs(BIGNUM **bnp, CBS *cbs); 82static int bn_dec2bn_cbs(BIGNUM **bnp, CBS *cbs);
75static int bn_hex2bn_cbs(BIGNUM **bnp, CBS *cbs); 83static int bn_hex2bn_cbs(BIGNUM **bnp, CBS *cbs);
diff --git a/src/lib/libcrypto/bn/bn_ctx.c b/src/lib/libcrypto/bn/bn_ctx.c
index 129b9c9781..eda93dcaa4 100644
--- a/src/lib/libcrypto/bn/bn_ctx.c
+++ b/src/lib/libcrypto/bn/bn_ctx.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_ctx.c,v 1.22 2023/07/08 12:21:58 beck Exp $ */ 1/* $OpenBSD: bn_ctx.c,v 1.23 2025/05/10 05:54:38 tb Exp $ */
2/* 2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -19,9 +19,9 @@
19#include <string.h> 19#include <string.h>
20 20
21#include <openssl/opensslconf.h> 21#include <openssl/opensslconf.h>
22#include <openssl/err.h>
23 22
24#include "bn_local.h" 23#include "bn_local.h"
24#include "err_local.h"
25 25
26#define BN_CTX_INITIAL_LEN 8 26#define BN_CTX_INITIAL_LEN 8
27 27
diff --git a/src/lib/libcrypto/bn/bn_div.c b/src/lib/libcrypto/bn/bn_div.c
index 09a8a364df..0a914db752 100644
--- a/src/lib/libcrypto/bn/bn_div.c
+++ b/src/lib/libcrypto/bn/bn_div.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_div.c,v 1.41 2024/04/10 14:58:06 beck Exp $ */ 1/* $OpenBSD: bn_div.c,v 1.44 2025/09/07 06:28:03 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -62,25 +62,15 @@
62#include <openssl/opensslconf.h> 62#include <openssl/opensslconf.h>
63 63
64#include <openssl/bn.h> 64#include <openssl/bn.h>
65#include <openssl/err.h>
66 65
67#include "bn_arch.h" 66#include "bn_arch.h"
68#include "bn_local.h" 67#include "bn_local.h"
69#include "bn_internal.h" 68#include "bn_internal.h"
69#include "err_local.h"
70 70
71BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0); 71BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0);
72 72
73#ifndef HAVE_BN_DIV_WORDS 73#ifndef HAVE_BN_DIV_WORDS
74#if defined(BN_LLONG) && defined(BN_DIV2W)
75
76BN_ULONG
77bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
78{
79 return ((BN_ULONG)(((((BN_ULLONG)h) << BN_BITS2)|l)/(BN_ULLONG)d));
80}
81
82#else
83
84/* Divide h,l by d and return the result. */ 74/* Divide h,l by d and return the result. */
85/* I need to test this some more :-( */ 75/* I need to test this some more :-( */
86BN_ULONG 76BN_ULONG
@@ -148,7 +138,6 @@ bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
148 ret |= q; 138 ret |= q;
149 return (ret); 139 return (ret);
150} 140}
151#endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */
152#endif 141#endif
153 142
154/* 143/*
@@ -375,7 +364,7 @@ BN_div_internal(BIGNUM *quotient, BIGNUM *remainder, const BIGNUM *numerator,
375 * | wnum - sdiv * q | < sdiv 364 * | wnum - sdiv * q | < sdiv
376 */ 365 */
377 q = bn_div_3_words(wnump, d1, d0); 366 q = bn_div_3_words(wnump, d1, d0);
378 l0 = bn_mul_words(tmp->d, sdiv->d, div_n, q); 367 l0 = bn_mulw_words(tmp->d, sdiv->d, div_n, q);
379 tmp->d[div_n] = l0; 368 tmp->d[div_n] = l0;
380 wnum.d--; 369 wnum.d--;
381 370
diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c
index e925d325d2..6a5c1c857a 100644
--- a/src/lib/libcrypto/bn/bn_exp.c
+++ b/src/lib/libcrypto/bn/bn_exp.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_exp.c,v 1.58 2025/02/13 11:15:09 tb Exp $ */ 1/* $OpenBSD: bn_exp.c,v 1.59 2025/05/10 05:54:38 tb Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -112,10 +112,9 @@
112#include <stdlib.h> 112#include <stdlib.h>
113#include <string.h> 113#include <string.h>
114 114
115#include <openssl/err.h>
116
117#include "bn_local.h" 115#include "bn_local.h"
118#include "constant_time.h" 116#include "constant_time.h"
117#include "err_local.h"
119 118
120/* maximum precomputation table size for *variable* sliding windows */ 119/* maximum precomputation table size for *variable* sliding windows */
121#define TABLE_SIZE 32 120#define TABLE_SIZE 32
diff --git a/src/lib/libcrypto/bn/bn_gcd.c b/src/lib/libcrypto/bn/bn_gcd.c
index fa5d71a7f3..319d9ca390 100644
--- a/src/lib/libcrypto/bn/bn_gcd.c
+++ b/src/lib/libcrypto/bn/bn_gcd.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_gcd.c,v 1.29 2024/04/10 14:58:06 beck Exp $ */ 1/* $OpenBSD: bn_gcd.c,v 1.31 2025/06/02 12:40:10 tb Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -109,9 +109,8 @@
109 * 109 *
110 */ 110 */
111 111
112#include <openssl/err.h>
113
114#include "bn_local.h" 112#include "bn_local.h"
113#include "err_local.h"
115 114
116static BIGNUM * 115static BIGNUM *
117euclid(BIGNUM *a, BIGNUM *b) 116euclid(BIGNUM *a, BIGNUM *b)
@@ -681,8 +680,10 @@ BN_mod_inverse_internal(BIGNUM *in, const BIGNUM *a, const BIGNUM *n, BN_CTX *ct
681 /* A >= 2*B, so D=2 or D=3 */ 680 /* A >= 2*B, so D=2 or D=3 */
682 if (!BN_sub(M, A, T)) 681 if (!BN_sub(M, A, T))
683 goto err; 682 goto err;
684 if (!BN_add(D,T,B)) goto err; /* use D (:= 3*B) as temp */ 683 /* use D (:= 3*B) as temp */
685 if (BN_ucmp(A, D) < 0) { 684 if (!BN_add(D, T, B))
685 goto err;
686 if (BN_ucmp(A, D) < 0) {
686 /* A < 3*B, so D=2 */ 687 /* A < 3*B, so D=2 */
687 if (!BN_set_word(D, 2)) 688 if (!BN_set_word(D, 2))
688 goto err; 689 goto err;
diff --git a/src/lib/libcrypto/bn/bn_internal.h b/src/lib/libcrypto/bn/bn_internal.h
index fd04bc9f8a..efe8202aa0 100644
--- a/src/lib/libcrypto/bn/bn_internal.h
+++ b/src/lib/libcrypto/bn/bn_internal.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_internal.h,v 1.15 2023/06/25 11:42:26 jsing Exp $ */ 1/* $OpenBSD: bn_internal.h,v 1.21 2025/12/05 14:12:32 tb Exp $ */
2/* 2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -18,6 +18,7 @@
18#include <openssl/bn.h> 18#include <openssl/bn.h>
19 19
20#include "bn_arch.h" 20#include "bn_arch.h"
21#include "bn_local.h"
21 22
22#ifndef HEADER_BN_INTERNAL_H 23#ifndef HEADER_BN_INTERNAL_H
23#define HEADER_BN_INTERNAL_H 24#define HEADER_BN_INTERNAL_H
@@ -26,6 +27,30 @@ int bn_word_clz(BN_ULONG w);
26 27
27int bn_bitsize(const BIGNUM *bn); 28int bn_bitsize(const BIGNUM *bn);
28 29
30BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
31 int num);
32BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
33 int num);
34BN_ULONG bn_sub_words_borrow(const BN_ULONG *a, const BN_ULONG *b, size_t n);
35BN_ULONG bn_add_words_masked(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
36 BN_ULONG mask, size_t n);
37BN_ULONG bn_sub_words_masked(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
38 BN_ULONG mask, size_t n);
39void bn_mod_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
40 const BN_ULONG *m, size_t n);
41void bn_mod_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
42 const BN_ULONG *m, size_t n);
43void bn_mod_mul_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
44 const BN_ULONG *m, BN_ULONG *t, BN_ULONG m0, size_t n);
45void bn_mod_sqr_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *m,
46 BN_ULONG *t, BN_ULONG m0, size_t n);
47
48void bn_montgomery_multiply_words(BN_ULONG *rp, const BN_ULONG *ap,
49 const BN_ULONG *bp, const BN_ULONG *np, BN_ULONG *tp, BN_ULONG n0,
50 int n_len);
51void bn_montgomery_reduce_words(BN_ULONG *r, BN_ULONG *a, const BN_ULONG *n,
52 BN_ULONG n0, int n_len);
53
29#ifndef HAVE_BN_CT_NE_ZERO 54#ifndef HAVE_BN_CT_NE_ZERO
30static inline int 55static inline int
31bn_ct_ne_zero(BN_ULONG w) 56bn_ct_ne_zero(BN_ULONG w)
diff --git a/src/lib/libcrypto/bn/bn_isqrt.c b/src/lib/libcrypto/bn/bn_isqrt.c
index 018d5f34bd..b725519e1a 100644
--- a/src/lib/libcrypto/bn/bn_isqrt.c
+++ b/src/lib/libcrypto/bn/bn_isqrt.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_isqrt.c,v 1.10 2023/06/04 17:28:35 tb Exp $ */ 1/* $OpenBSD: bn_isqrt.c,v 1.11 2025/05/10 05:54:38 tb Exp $ */
2/* 2/*
3 * Copyright (c) 2022 Theo Buehler <tb@openbsd.org> 3 * Copyright (c) 2022 Theo Buehler <tb@openbsd.org>
4 * 4 *
@@ -19,10 +19,10 @@
19#include <stdint.h> 19#include <stdint.h>
20 20
21#include <openssl/bn.h> 21#include <openssl/bn.h>
22#include <openssl/err.h>
23 22
24#include "bn_local.h" 23#include "bn_local.h"
25#include "crypto_internal.h" 24#include "crypto_internal.h"
25#include "err_local.h"
26 26
27/* 27/*
28 * Calculate integer square root of |n| using a variant of Newton's method. 28 * Calculate integer square root of |n| using a variant of Newton's method.
diff --git a/src/lib/libcrypto/bn/bn_lib.c b/src/lib/libcrypto/bn/bn_lib.c
index 72b988650c..0326e72c4d 100644
--- a/src/lib/libcrypto/bn/bn_lib.c
+++ b/src/lib/libcrypto/bn/bn_lib.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_lib.c,v 1.93 2024/04/16 13:07:14 jsing Exp $ */ 1/* $OpenBSD: bn_lib.c,v 1.95 2025/12/15 12:09:46 tb Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -63,10 +63,9 @@
63 63
64#include <openssl/opensslconf.h> 64#include <openssl/opensslconf.h>
65 65
66#include <openssl/err.h>
67
68#include "bn_local.h" 66#include "bn_local.h"
69#include "bn_internal.h" 67#include "bn_internal.h"
68#include "err_local.h"
70 69
71BIGNUM * 70BIGNUM *
72BN_new(void) 71BN_new(void)
@@ -350,7 +349,7 @@ BN_ULONG
350BN_get_word(const BIGNUM *a) 349BN_get_word(const BIGNUM *a)
351{ 350{
352 if (a->top > 1) 351 if (a->top > 1)
353 return BN_MASK2; 352 return (BN_ULONG)-1;
354 else if (a->top == 1) 353 else if (a->top == 1)
355 return a->d[0]; 354 return a->d[0];
356 /* a->top == 0 */ 355 /* a->top == 0 */
diff --git a/src/lib/libcrypto/bn/bn_local.h b/src/lib/libcrypto/bn/bn_local.h
index 067ffab3d9..2f5b58a548 100644
--- a/src/lib/libcrypto/bn/bn_local.h
+++ b/src/lib/libcrypto/bn/bn_local.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_local.h,v 1.50 2025/02/13 11:04:20 tb Exp $ */ 1/* $OpenBSD: bn_local.h,v 1.62 2026/01/23 08:29:04 tb Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -116,6 +116,20 @@
116 116
117#include <openssl/bn.h> 117#include <openssl/bn.h>
118 118
119#if BN_BYTES == 8
120#define BN_MASK2 UINT64_C(0xffffffffffffffff)
121#define BN_MASK2l UINT64_C(0xffffffff)
122#define BN_MASK2h UINT64_C(0xffffffff00000000)
123#define BN_BITS 128
124#define BN_BITS4 32
125#else
126#define BN_MASK2 UINT32_C(0xffffffff)
127#define BN_MASK2l UINT32_C(0xffff)
128#define BN_MASK2h UINT32_C(0xffff0000)
129#define BN_BITS 64
130#define BN_BITS4 16
131#endif
132
119__BEGIN_HIDDEN_DECLS 133__BEGIN_HIDDEN_DECLS
120 134
121struct bignum_st { 135struct bignum_st {
@@ -239,12 +253,16 @@ BN_ULONG bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len,
239BN_ULONG bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, 253BN_ULONG bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len,
240 const BN_ULONG *b, int b_len); 254 const BN_ULONG *b, int b_len);
241 255
242void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb); 256void bn_mul_comba4(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b);
243void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); 257void bn_mul_comba6(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b);
244void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); 258void bn_mul_comba8(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b);
259void bn_mul_words(BN_ULONG *r, const BN_ULONG *a, int a_len, const BN_ULONG *b,
260 int b_len);
245 261
246void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a); 262void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a);
263void bn_sqr_comba6(BN_ULONG *r, const BN_ULONG *a);
247void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a); 264void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a);
265void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int a_len);
248 266
249int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, 267int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
250 const BN_ULONG *np, const BN_ULONG *n0, int num); 268 const BN_ULONG *np, const BN_ULONG *n0, int num);
@@ -254,13 +272,8 @@ int bn_expand_bits(BIGNUM *a, size_t bits);
254int bn_expand_bytes(BIGNUM *a, size_t bytes); 272int bn_expand_bytes(BIGNUM *a, size_t bytes);
255int bn_wexpand(BIGNUM *a, int words); 273int bn_wexpand(BIGNUM *a, int words);
256 274
257BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, 275BN_ULONG bn_mulw_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
258 int num); 276BN_ULONG bn_mulw_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
259BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
260 int num);
261BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
262BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
263void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num);
264BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d); 277BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d);
265void bn_div_rem_words(BN_ULONG h, BN_ULONG l, BN_ULONG d, BN_ULONG *out_q, 278void bn_div_rem_words(BN_ULONG h, BN_ULONG l, BN_ULONG d, BN_ULONG *out_q,
266 BN_ULONG *out_r); 279 BN_ULONG *out_r);
@@ -331,5 +344,11 @@ int bn_printf(BIO *bio, const BIGNUM *bn, int indent, const char *fmt, ...)
331int bn_bn2hex_nosign(const BIGNUM *bn, char **out, size_t *out_len); 344int bn_bn2hex_nosign(const BIGNUM *bn, char **out, size_t *out_len);
332int bn_bn2hex_nibbles(const BIGNUM *bn, char **out, size_t *out_len); 345int bn_bn2hex_nibbles(const BIGNUM *bn, char **out, size_t *out_len);
333 346
347BIGNUM *BN_get_rfc7919_prime_2048(BIGNUM *bn);
348BIGNUM *BN_get_rfc7919_prime_3072(BIGNUM *bn);
349BIGNUM *BN_get_rfc7919_prime_4096(BIGNUM *bn);
350BIGNUM *BN_get_rfc7919_prime_6144(BIGNUM *bn);
351BIGNUM *BN_get_rfc7919_prime_8192(BIGNUM *bn);
352
334__END_HIDDEN_DECLS 353__END_HIDDEN_DECLS
335#endif /* !HEADER_BN_LOCAL_H */ 354#endif /* !HEADER_BN_LOCAL_H */
diff --git a/src/lib/libcrypto/bn/bn_mod.c b/src/lib/libcrypto/bn/bn_mod.c
index 365f6fcf03..7198c02e3b 100644
--- a/src/lib/libcrypto/bn/bn_mod.c
+++ b/src/lib/libcrypto/bn/bn_mod.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_mod.c,v 1.22 2023/07/08 12:21:58 beck Exp $ */ 1/* $OpenBSD: bn_mod.c,v 1.23 2025/05/10 05:54:38 tb Exp $ */
2/* Includes code written by Lenka Fibikova <fibikova@exp-math.uni-essen.de> 2/* Includes code written by Lenka Fibikova <fibikova@exp-math.uni-essen.de>
3 * for the OpenSSL project. */ 3 * for the OpenSSL project. */
4/* ==================================================================== 4/* ====================================================================
@@ -111,9 +111,8 @@
111 * [including the GNU Public Licence.] 111 * [including the GNU Public Licence.]
112 */ 112 */
113 113
114#include <openssl/err.h>
115
116#include "bn_local.h" 114#include "bn_local.h"
115#include "err_local.h"
117 116
118int 117int
119BN_mod_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx) 118BN_mod_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
diff --git a/src/lib/libcrypto/bn/bn_mod_sqrt.c b/src/lib/libcrypto/bn/bn_mod_sqrt.c
index 280002cc48..fc55f84317 100644
--- a/src/lib/libcrypto/bn/bn_mod_sqrt.c
+++ b/src/lib/libcrypto/bn/bn_mod_sqrt.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_mod_sqrt.c,v 1.3 2023/08/03 18:53:55 tb Exp $ */ 1/* $OpenBSD: bn_mod_sqrt.c,v 1.4 2025/05/10 05:54:38 tb Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2022 Theo Buehler <tb@openbsd.org> 4 * Copyright (c) 2022 Theo Buehler <tb@openbsd.org>
@@ -16,9 +16,8 @@
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */ 17 */
18 18
19#include <openssl/err.h>
20
21#include "bn_local.h" 19#include "bn_local.h"
20#include "err_local.h"
22 21
23/* 22/*
24 * Tonelli-Shanks according to H. Cohen "A Course in Computational Algebraic 23 * Tonelli-Shanks according to H. Cohen "A Course in Computational Algebraic
diff --git a/src/lib/libcrypto/bn/bn_mod_words.c b/src/lib/libcrypto/bn/bn_mod_words.c
new file mode 100644
index 0000000000..f368e074db
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_mod_words.c
@@ -0,0 +1,110 @@
1/* $OpenBSD: bn_mod_words.c,v 1.7 2025/09/07 05:21:29 jsing Exp $ */
2/*
3 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include "bn_local.h"
19#include "bn_internal.h"
20
21/*
22 * bn_mod_add_words() computes r[] = (a[] + b[]) mod m[], where a, b, r and
23 * m are arrays of words with length n (r may be the same as a or b).
24 */
25#ifndef HAVE_BN_MOD_ADD_WORDS
26void
27bn_mod_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
28 const BN_ULONG *m, size_t n)
29{
30 BN_ULONG carry, mask;
31
32 /*
33 * Compute a + b, then compute r - m to determine if r >= m, considering
34 * any carry that resulted from the addition. Finally complete a
35 * conditional subtraction of r - m.
36 */
37 /* XXX - change bn_add_words to use size_t. */
38 carry = bn_add_words(r, a, b, n);
39 mask = ~(carry - bn_sub_words_borrow(r, m, n));
40 bn_sub_words_masked(r, r, m, mask, n);
41}
42#endif
43
44/*
45 * bn_mod_sub_words() computes r[] = (a[] - b[]) mod m[], where a, b, r and
46 * m are arrays of words with length n (r may be the same as a or b).
47 */
48#ifndef HAVE_BN_MOD_SUB_WORDS
49void
50bn_mod_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
51 const BN_ULONG *m, size_t n)
52{
53 BN_ULONG borrow, mask;
54
55 /*
56 * Compute a - b, then complete a conditional addition of r + m
57 * based on the resulting borrow.
58 */
59 /* XXX - change bn_sub_words to use size_t. */
60 borrow = bn_sub_words(r, a, b, n);
61 mask = (0 - borrow);
62 bn_add_words_masked(r, r, m, mask, n);
63}
64#endif
65
66/*
67 * bn_mod_mul_words() computes r[] = (a[] * b[]) mod m[], where a, b, r and
68 * m are arrays of words with length n (r may be the same as a or b) in the
69 * Montgomery domain. The result remains in the Montgomery domain.
70 */
71#ifndef HAVE_BN_MOD_MUL_WORDS
72void
73bn_mod_mul_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
74 const BN_ULONG *m, BN_ULONG *t, BN_ULONG m0, size_t n)
75{
76 if (n == 4) {
77 bn_mul_comba4(t, a, b);
78 } else if (n == 6) {
79 bn_mul_comba6(t, a, b);
80 } else if (n == 8) {
81 bn_mul_comba8(t, a, b);
82 } else {
83 bn_mul_words(t, a, n, b, n);
84 }
85 bn_montgomery_reduce_words(r, t, m, m0, n);
86}
87#endif
88
89/*
90 * bn_mod_sqr_words() computes r[] = (a[] * a[]) mod m[], where a, r and
91 * m are arrays of words with length n (r may be the same as a) in the
92 * Montgomery domain. The result remains in the Montgomery domain.
93 */
94#ifndef HAVE_BN_MOD_SQR_WORDS
95void
96bn_mod_sqr_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *m,
97 BN_ULONG *t, BN_ULONG m0, size_t n)
98{
99 if (n == 4) {
100 bn_sqr_comba4(t, a);
101 } else if (n == 6) {
102 bn_sqr_comba6(t, a);
103 } else if (n == 8) {
104 bn_sqr_comba8(t, a);
105 } else {
106 bn_sqr_words(t, a, n);
107 }
108 bn_montgomery_reduce_words(r, t, m, m0, n);
109}
110#endif
diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c
index edd7bcd0c8..c9e95fb08b 100644
--- a/src/lib/libcrypto/bn/bn_mont.c
+++ b/src/lib/libcrypto/bn/bn_mont.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_mont.c,v 1.66 2025/03/09 15:22:40 tb Exp $ */ 1/* $OpenBSD: bn_mont.c,v 1.70 2025/08/30 07:54:27 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -116,6 +116,7 @@
116 * sections 3.8 and 4.2 in http://security.ece.orst.edu/koc/papers/r01rsasw.pdf 116 * sections 3.8 and 4.2 in http://security.ece.orst.edu/koc/papers/r01rsasw.pdf
117 */ 117 */
118 118
119#include <limits.h>
119#include <stdio.h> 120#include <stdio.h>
120#include <stdint.h> 121#include <stdint.h>
121#include <string.h> 122#include <string.h>
@@ -214,7 +215,7 @@ BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
214 goto err; 215 goto err;
215 mont->N.neg = 0; 216 mont->N.neg = 0;
216 mont->ri = ((BN_num_bits(mod) + BN_BITS2 - 1) / BN_BITS2) * BN_BITS2; 217 mont->ri = ((BN_num_bits(mod) + BN_BITS2 - 1) / BN_BITS2) * BN_BITS2;
217 if (mont->ri * 2 < mont->ri) 218 if (mont->ri > INT_MAX / 2)
218 goto err; 219 goto err;
219 220
220 /* 221 /*
@@ -316,6 +317,44 @@ BN_MONT_CTX_set_locked(BN_MONT_CTX **pmctx, int lock, const BIGNUM *mod,
316LCRYPTO_ALIAS(BN_MONT_CTX_set_locked); 317LCRYPTO_ALIAS(BN_MONT_CTX_set_locked);
317 318
318/* 319/*
320 * bn_montgomery_reduce_words() performs Montgomery reduction, reducing the input
321 * from its Montgomery form aR to a, returning the result in r. a must be twice
322 * the length of the modulus. Note that the input is mutated in the process of
323 * performing the reduction.
324 */
325void
326bn_montgomery_reduce_words(BN_ULONG *r, BN_ULONG *a, const BN_ULONG *n,
327 BN_ULONG n0, int n_len)
328{
329 BN_ULONG v, mask;
330 BN_ULONG carry = 0;
331 int i;
332
333 /* Add multiples of the modulus, so that it becomes divisible by R. */
334 for (i = 0; i < n_len; i++) {
335 v = bn_mulw_add_words(&a[i], n, n_len, a[i] * n0);
336 bn_addw_addw(v, a[i + n_len], carry, &carry, &a[i + n_len]);
337 }
338
339 /* Divide by R (this is the equivalent of right shifting by n_len). */
340 a = &a[n_len];
341
342 /*
343 * The output is now in the range of [0, 2N). Attempt to reduce once by
344 * subtracting the modulus. If the reduction was necessary then the
345 * result is already in r, otherwise copy the value prior to reduction
346 * from the top half of a.
347 */
348 mask = carry - bn_sub_words(r, a, n, n_len);
349
350 for (i = 0; i < n_len; i++) {
351 *r = (*r & ~mask) | (*a & mask);
352 r++;
353 a++;
354 }
355}
356
357/*
319 * bn_montgomery_reduce() performs Montgomery reduction, reducing the input 358 * bn_montgomery_reduce() performs Montgomery reduction, reducing the input
320 * from its Montgomery form aR to a, returning the result in r. Note that the 359 * from its Montgomery form aR to a, returning the result in r. Note that the
321 * input is mutated in the process of performing the reduction, destroying its 360 * input is mutated in the process of performing the reduction, destroying its
@@ -325,7 +364,6 @@ static int
325bn_montgomery_reduce(BIGNUM *r, BIGNUM *a, BN_MONT_CTX *mctx) 364bn_montgomery_reduce(BIGNUM *r, BIGNUM *a, BN_MONT_CTX *mctx)
326{ 365{
327 BIGNUM *n; 366 BIGNUM *n;
328 BN_ULONG *ap, *rp, n0, v, carry, mask;
329 int i, max, n_len; 367 int i, max, n_len;
330 368
331 n = &mctx->N; 369 n = &mctx->N;
@@ -341,7 +379,8 @@ bn_montgomery_reduce(BIGNUM *r, BIGNUM *a, BN_MONT_CTX *mctx)
341 379
342 /* 380 /*
343 * Expand a to twice the length of the modulus, zero if necessary. 381 * Expand a to twice the length of the modulus, zero if necessary.
344 * XXX - make this a requirement of the caller. 382 * XXX - make this a requirement of the caller or use a temporary
383 * allocation.
345 */ 384 */
346 if ((max = 2 * n_len) < n_len) 385 if ((max = 2 * n_len) < n_len)
347 return 0; 386 return 0;
@@ -350,33 +389,8 @@ bn_montgomery_reduce(BIGNUM *r, BIGNUM *a, BN_MONT_CTX *mctx)
350 for (i = a->top; i < max; i++) 389 for (i = a->top; i < max; i++)
351 a->d[i] = 0; 390 a->d[i] = 0;
352 391
353 carry = 0; 392 bn_montgomery_reduce_words(r->d, a->d, n->d, mctx->n0[0], n_len);
354 n0 = mctx->n0[0];
355 393
356 /* Add multiples of the modulus, so that it becomes divisible by R. */
357 for (i = 0; i < n_len; i++) {
358 v = bn_mul_add_words(&a->d[i], n->d, n_len, a->d[i] * n0);
359 bn_addw_addw(v, a->d[i + n_len], carry, &carry,
360 &a->d[i + n_len]);
361 }
362
363 /* Divide by R (this is the equivalent of right shifting by n_len). */
364 ap = &a->d[n_len];
365
366 /*
367 * The output is now in the range of [0, 2N). Attempt to reduce once by
368 * subtracting the modulus. If the reduction was necessary then the
369 * result is already in r, otherwise copy the value prior to reduction
370 * from the top half of a.
371 */
372 mask = carry - bn_sub_words(r->d, ap, n->d, n_len);
373
374 rp = r->d;
375 for (i = 0; i < n_len; i++) {
376 *rp = (*rp & ~mask) | (*ap & mask);
377 rp++;
378 ap++;
379 }
380 r->top = n_len; 394 r->top = n_len;
381 395
382 bn_correct_top(r); 396 bn_correct_top(r);
@@ -417,7 +431,7 @@ bn_mod_mul_montgomery_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
417 return ret; 431 return ret;
418} 432}
419 433
420static void 434static inline void
421bn_montgomery_multiply_word(const BN_ULONG *ap, BN_ULONG b, const BN_ULONG *np, 435bn_montgomery_multiply_word(const BN_ULONG *ap, BN_ULONG b, const BN_ULONG *np,
422 BN_ULONG *tp, BN_ULONG w, BN_ULONG *carry_a, BN_ULONG *carry_n, int n_len) 436 BN_ULONG *tp, BN_ULONG w, BN_ULONG *carry_a, BN_ULONG *carry_n, int n_len)
423{ 437{
@@ -452,7 +466,7 @@ bn_montgomery_multiply_word(const BN_ULONG *ap, BN_ULONG b, const BN_ULONG *np,
452 * given word arrays. The caller must ensure that rp, ap, bp and np are all 466 * given word arrays. The caller must ensure that rp, ap, bp and np are all
453 * n_len words in length, while tp must be n_len * 2 + 2 words in length. 467 * n_len words in length, while tp must be n_len * 2 + 2 words in length.
454 */ 468 */
455static void 469void
456bn_montgomery_multiply_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, 470bn_montgomery_multiply_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
457 const BN_ULONG *np, BN_ULONG *tp, BN_ULONG n0, int n_len) 471 const BN_ULONG *np, BN_ULONG *tp, BN_ULONG n0, int n_len)
458{ 472{
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c
index bdeb9b0fe8..7db0f61849 100644
--- a/src/lib/libcrypto/bn/bn_mul.c
+++ b/src/lib/libcrypto/bn/bn_mul.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_mul.c,v 1.39 2023/07/08 12:21:58 beck Exp $ */ 1/* $OpenBSD: bn_mul.c,v 1.46 2025/09/01 15:39:59 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -57,6 +57,7 @@
57 */ 57 */
58 58
59#include <assert.h> 59#include <assert.h>
60#include <limits.h>
60#include <stdio.h> 61#include <stdio.h>
61#include <string.h> 62#include <string.h>
62 63
@@ -73,7 +74,7 @@
73 */ 74 */
74#ifndef HAVE_BN_MUL_COMBA4 75#ifndef HAVE_BN_MUL_COMBA4
75void 76void
76bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 77bn_mul_comba4(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b)
77{ 78{
78 BN_ULONG c0, c1, c2; 79 BN_ULONG c0, c1, c2;
79 80
@@ -103,13 +104,73 @@ bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
103#endif 104#endif
104 105
105/* 106/*
107 * bn_mul_comba6() computes r[] = a[] * b[] using Comba multiplication
108 * (https://everything2.com/title/Comba+multiplication), where a and b are both
109 * six word arrays, producing a 12 word array result.
110 */
111#ifndef HAVE_BN_MUL_COMBA6
112void
113bn_mul_comba6(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b)
114{
115 BN_ULONG c0, c1, c2;
116
117 bn_mulw_addtw(a[0], b[0], 0, 0, 0, &c2, &c1, &r[0]);
118
119 bn_mulw_addtw(a[0], b[1], 0, c2, c1, &c2, &c1, &c0);
120 bn_mulw_addtw(a[1], b[0], c2, c1, c0, &c2, &c1, &r[1]);
121
122 bn_mulw_addtw(a[2], b[0], 0, c2, c1, &c2, &c1, &c0);
123 bn_mulw_addtw(a[1], b[1], c2, c1, c0, &c2, &c1, &c0);
124 bn_mulw_addtw(a[0], b[2], c2, c1, c0, &c2, &c1, &r[2]);
125
126 bn_mulw_addtw(a[0], b[3], 0, c2, c1, &c2, &c1, &c0);
127 bn_mulw_addtw(a[1], b[2], c2, c1, c0, &c2, &c1, &c0);
128 bn_mulw_addtw(a[2], b[1], c2, c1, c0, &c2, &c1, &c0);
129 bn_mulw_addtw(a[3], b[0], c2, c1, c0, &c2, &c1, &r[3]);
130
131 bn_mulw_addtw(a[4], b[0], 0, c2, c1, &c2, &c1, &c0);
132 bn_mulw_addtw(a[3], b[1], c2, c1, c0, &c2, &c1, &c0);
133 bn_mulw_addtw(a[2], b[2], c2, c1, c0, &c2, &c1, &c0);
134 bn_mulw_addtw(a[1], b[3], c2, c1, c0, &c2, &c1, &c0);
135 bn_mulw_addtw(a[0], b[4], c2, c1, c0, &c2, &c1, &r[4]);
136
137 bn_mulw_addtw(a[0], b[5], 0, c2, c1, &c2, &c1, &c0);
138 bn_mulw_addtw(a[1], b[4], c2, c1, c0, &c2, &c1, &c0);
139 bn_mulw_addtw(a[2], b[3], c2, c1, c0, &c2, &c1, &c0);
140 bn_mulw_addtw(a[3], b[2], c2, c1, c0, &c2, &c1, &c0);
141 bn_mulw_addtw(a[4], b[1], c2, c1, c0, &c2, &c1, &c0);
142 bn_mulw_addtw(a[5], b[0], c2, c1, c0, &c2, &c1, &r[5]);
143
144 bn_mulw_addtw(a[5], b[1], 0, c2, c1, &c2, &c1, &c0);
145 bn_mulw_addtw(a[4], b[2], c2, c1, c0, &c2, &c1, &c0);
146 bn_mulw_addtw(a[3], b[3], c2, c1, c0, &c2, &c1, &c0);
147 bn_mulw_addtw(a[2], b[4], c2, c1, c0, &c2, &c1, &c0);
148 bn_mulw_addtw(a[1], b[5], c2, c1, c0, &c2, &c1, &r[6]);
149
150 bn_mulw_addtw(a[2], b[5], 0, c2, c1, &c2, &c1, &c0);
151 bn_mulw_addtw(a[3], b[4], c2, c1, c0, &c2, &c1, &c0);
152 bn_mulw_addtw(a[4], b[3], c2, c1, c0, &c2, &c1, &c0);
153 bn_mulw_addtw(a[5], b[2], c2, c1, c0, &c2, &c1, &r[7]);
154
155 bn_mulw_addtw(a[5], b[3], 0, c2, c1, &c2, &c1, &c0);
156 bn_mulw_addtw(a[4], b[4], c2, c1, c0, &c2, &c1, &c0);
157 bn_mulw_addtw(a[3], b[5], c2, c1, c0, &c2, &c1, &r[8]);
158
159 bn_mulw_addtw(a[4], b[5], 0, c2, c1, &c2, &c1, &c0);
160 bn_mulw_addtw(a[5], b[4], c2, c1, c0, &c2, &c1, &r[9]);
161
162 bn_mulw_addtw(a[5], b[5], 0, c2, c1, &c2, &r[11], &r[10]);
163}
164#endif
165
166/*
106 * bn_mul_comba8() computes r[] = a[] * b[] using Comba multiplication 167 * bn_mul_comba8() computes r[] = a[] * b[] using Comba multiplication
107 * (https://everything2.com/title/Comba+multiplication), where a and b are both 168 * (https://everything2.com/title/Comba+multiplication), where a and b are both
108 * eight word arrays, producing a 16 word array result. 169 * eight word arrays, producing a 16 word array result.
109 */ 170 */
110#ifndef HAVE_BN_MUL_COMBA8 171#ifndef HAVE_BN_MUL_COMBA8
111void 172void
112bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 173bn_mul_comba8(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b)
113{ 174{
114 BN_ULONG c0, c1, c2; 175 BN_ULONG c0, c1, c2;
115 176
@@ -195,14 +256,13 @@ bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
195#endif 256#endif
196 257
197/* 258/*
198 * bn_mul_words() computes (carry:r[i]) = a[i] * w + carry, where a is an array 259 * bn_mulw_words() computes (carry:r[i]) = a[i] * w + carry, where a is an array
199 * of words and w is a single word. This should really be called bn_mulw_words() 260 * of words and w is a single word. This is used as a step in the multiplication
200 * since only one input is an array. This is used as a step in the multiplication
201 * of word arrays. 261 * of word arrays.
202 */ 262 */
203#ifndef HAVE_BN_MUL_WORDS 263#ifndef HAVE_BN_MULW_WORDS
204BN_ULONG 264BN_ULONG
205bn_mul_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w) 265bn_mulw_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w)
206{ 266{
207 BN_ULONG carry = 0; 267 BN_ULONG carry = 0;
208 268
@@ -228,14 +288,13 @@ bn_mul_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w)
228#endif 288#endif
229 289
230/* 290/*
231 * bn_mul_add_words() computes (carry:r[i]) = a[i] * w + r[i] + carry, where 291 * bn_mulw_add_words() computes (carry:r[i]) = a[i] * w + r[i] + carry, where
232 * a is an array of words and w is a single word. This should really be called 292 * a is an array of words and w is a single word. This is used as a step in the
233 * bn_mulw_add_words() since only one input is an array. This is used as a step 293 * multiplication of word arrays.
234 * in the multiplication of word arrays.
235 */ 294 */
236#ifndef HAVE_BN_MUL_ADD_WORDS 295#ifndef HAVE_BN_MULW_ADD_WORDS
237BN_ULONG 296BN_ULONG
238bn_mul_add_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w) 297bn_mulw_add_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w)
239{ 298{
240 BN_ULONG carry = 0; 299 BN_ULONG carry = 0;
241 300
@@ -262,62 +321,60 @@ bn_mul_add_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w)
262} 321}
263#endif 322#endif
264 323
324#ifndef HAVE_BN_MUL_WORDS
265void 325void
266bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) 326bn_mul_words(BN_ULONG *r, const BN_ULONG *a, int a_len, const BN_ULONG *b,
327 int b_len)
267{ 328{
268 BN_ULONG *rr; 329 BN_ULONG *rr;
269 330
270 331 if (a_len < b_len) {
271 if (na < nb) {
272 int itmp; 332 int itmp;
273 BN_ULONG *ltmp; 333 const BN_ULONG *ltmp;
274 334
275 itmp = na; 335 itmp = a_len;
276 na = nb; 336 a_len = b_len;
277 nb = itmp; 337 b_len = itmp;
278 ltmp = a; 338 ltmp = a;
279 a = b; 339 a = b;
280 b = ltmp; 340 b = ltmp;
281 341
282 } 342 }
283 rr = &(r[na]); 343 rr = &(r[a_len]);
284 if (nb <= 0) { 344 if (b_len <= 0) {
285 (void)bn_mul_words(r, a, na, 0); 345 (void)bn_mulw_words(r, a, a_len, 0);
286 return; 346 return;
287 } else 347 } else
288 rr[0] = bn_mul_words(r, a, na, b[0]); 348 rr[0] = bn_mulw_words(r, a, a_len, b[0]);
289 349
290 for (;;) { 350 for (;;) {
291 if (--nb <= 0) 351 if (--b_len <= 0)
292 return; 352 return;
293 rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]); 353 rr[1] = bn_mulw_add_words(&(r[1]), a, a_len, b[1]);
294 if (--nb <= 0) 354 if (--b_len <= 0)
295 return; 355 return;
296 rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]); 356 rr[2] = bn_mulw_add_words(&(r[2]), a, a_len, b[2]);
297 if (--nb <= 0) 357 if (--b_len <= 0)
298 return; 358 return;
299 rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]); 359 rr[3] = bn_mulw_add_words(&(r[3]), a, a_len, b[3]);
300 if (--nb <= 0) 360 if (--b_len <= 0)
301 return; 361 return;
302 rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]); 362 rr[4] = bn_mulw_add_words(&(r[4]), a, a_len, b[4]);
303 rr += 4; 363 rr += 4;
304 r += 4; 364 r += 4;
305 b += 4; 365 b += 4;
306 } 366 }
307} 367}
368#endif
308 369
309 370static int
310#ifndef HAVE_BN_MUL
311int
312bn_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, int rn, BN_CTX *ctx) 371bn_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, int rn, BN_CTX *ctx)
313{ 372{
314 bn_mul_normal(r->d, a->d, a->top, b->d, b->top); 373 bn_mul_words(r->d, a->d, a->top, b->d, b->top);
315 374
316 return 1; 375 return 1;
317} 376}
318 377
319#endif /* HAVE_BN_MUL */
320
321int 378int
322BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) 379BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
323{ 380{
@@ -338,14 +395,16 @@ BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
338 if (rr == NULL) 395 if (rr == NULL)
339 goto err; 396 goto err;
340 397
341 rn = a->top + b->top; 398 if (a->top > INT_MAX - b->top)
342 if (rn < a->top)
343 goto err; 399 goto err;
400 rn = a->top + b->top;
344 if (!bn_wexpand(rr, rn)) 401 if (!bn_wexpand(rr, rn))
345 goto err; 402 goto err;
346 403
347 if (a->top == 4 && b->top == 4) { 404 if (a->top == 4 && b->top == 4) {
348 bn_mul_comba4(rr->d, a->d, b->d); 405 bn_mul_comba4(rr->d, a->d, b->d);
406 } else if (a->top == 6 && b->top == 6) {
407 bn_mul_comba6(rr->d, a->d, b->d);
349 } else if (a->top == 8 && b->top == 8) { 408 } else if (a->top == 8 && b->top == 8) {
350 bn_mul_comba8(rr->d, a->d, b->d); 409 bn_mul_comba8(rr->d, a->d, b->d);
351 } else { 410 } else {
diff --git a/src/lib/libcrypto/bn/bn_prime.c b/src/lib/libcrypto/bn/bn_prime.c
index 5a4aa50bf1..3d7f18a8ea 100644
--- a/src/lib/libcrypto/bn/bn_prime.c
+++ b/src/lib/libcrypto/bn/bn_prime.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_prime.c,v 1.34 2023/07/20 06:26:27 tb Exp $ */ 1/* $OpenBSD: bn_prime.c,v 1.37 2025/11/08 16:27:33 tb Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -109,12 +109,12 @@
109 * 109 *
110 */ 110 */
111 111
112#include <stdio.h> 112#include <stddef.h>
113#include <time.h>
114 113
115#include <openssl/err.h> 114#include <openssl/bn.h>
116 115
117#include "bn_local.h" 116#include "bn_local.h"
117#include "err_local.h"
118 118
119/* The quick sieve algorithm approach to weeding out primes is 119/* The quick sieve algorithm approach to weeding out primes is
120 * Philip Zimmermann's, as implemented in PGP. I have had a read of 120 * Philip Zimmermann's, as implemented in PGP. I have had a read of
@@ -339,7 +339,7 @@ probable_prime_dh(BIGNUM *rnd, int bits, const BIGNUM *add, const BIGNUM *rem,
339loop: 339loop:
340 for (i = 1; i < NUMPRIMES; i++) { 340 for (i = 1; i < NUMPRIMES; i++) {
341 /* check that rnd is a prime */ 341 /* check that rnd is a prime */
342 BN_LONG mod = BN_mod_word(rnd, primes[i]); 342 BN_ULONG mod = BN_mod_word(rnd, primes[i]);
343 if (mod == (BN_ULONG)-1) 343 if (mod == (BN_ULONG)-1)
344 goto err; 344 goto err;
345 if (mod <= 1) { 345 if (mod <= 1) {
diff --git a/src/lib/libcrypto/bn/bn_rand.c b/src/lib/libcrypto/bn/bn_rand.c
index 9cfcd8e2c0..d3b16f70a0 100644
--- a/src/lib/libcrypto/bn/bn_rand.c
+++ b/src/lib/libcrypto/bn/bn_rand.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_rand.c,v 1.30 2024/03/16 20:42:33 tb Exp $ */ 1/* $OpenBSD: bn_rand.c,v 1.31 2025/05/10 05:54:38 tb Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -115,9 +115,8 @@
115#include <string.h> 115#include <string.h>
116#include <time.h> 116#include <time.h>
117 117
118#include <openssl/err.h>
119
120#include "bn_local.h" 118#include "bn_local.h"
119#include "err_local.h"
121 120
122static int 121static int
123bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) 122bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom)
diff --git a/src/lib/libcrypto/bn/bn_recp.c b/src/lib/libcrypto/bn/bn_recp.c
index e3f22c52a9..ed5049b772 100644
--- a/src/lib/libcrypto/bn/bn_recp.c
+++ b/src/lib/libcrypto/bn/bn_recp.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_recp.c,v 1.33 2025/02/04 20:22:20 tb Exp $ */ 1/* $OpenBSD: bn_recp.c,v 1.34 2025/05/10 05:54:38 tb Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -58,9 +58,8 @@
58 58
59#include <stdio.h> 59#include <stdio.h>
60 60
61#include <openssl/err.h>
62
63#include "bn_local.h" 61#include "bn_local.h"
62#include "err_local.h"
64 63
65struct bn_recp_ctx_st { 64struct bn_recp_ctx_st {
66 BIGNUM *N; /* the divisor */ 65 BIGNUM *N; /* the divisor */
diff --git a/src/lib/libcrypto/bn/bn_shift.c b/src/lib/libcrypto/bn/bn_shift.c
index 12edc7c0a0..b9f73cc322 100644
--- a/src/lib/libcrypto/bn/bn_shift.c
+++ b/src/lib/libcrypto/bn/bn_shift.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_shift.c,v 1.22 2023/07/08 12:21:58 beck Exp $ */ 1/* $OpenBSD: bn_shift.c,v 1.23 2025/05/10 05:54:38 tb Exp $ */
2/* 2/*
3 * Copyright (c) 2022, 2023 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2022, 2023 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -16,9 +16,9 @@
16 */ 16 */
17 17
18#include <openssl/bn.h> 18#include <openssl/bn.h>
19#include <openssl/err.h>
20 19
21#include "bn_local.h" 20#include "bn_local.h"
21#include "err_local.h"
22 22
23static inline int 23static inline int
24bn_lshift(BIGNUM *r, const BIGNUM *a, int n) 24bn_lshift(BIGNUM *r, const BIGNUM *a, int n)
diff --git a/src/lib/libcrypto/bn/bn_sqr.c b/src/lib/libcrypto/bn/bn_sqr.c
index 0dbccbf85d..27e08bdf13 100644
--- a/src/lib/libcrypto/bn/bn_sqr.c
+++ b/src/lib/libcrypto/bn/bn_sqr.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_sqr.c,v 1.36 2023/07/08 12:21:58 beck Exp $ */ 1/* $OpenBSD: bn_sqr.c,v 1.42 2025/09/07 05:21:29 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -64,8 +64,6 @@
64#include "bn_local.h" 64#include "bn_local.h"
65#include "bn_internal.h" 65#include "bn_internal.h"
66 66
67int bn_sqr(BIGNUM *r, const BIGNUM *a, int max, BN_CTX *ctx);
68
69/* 67/*
70 * bn_sqr_comba4() computes r[] = a[] * a[] using Comba multiplication 68 * bn_sqr_comba4() computes r[] = a[] * a[] using Comba multiplication
71 * (https://everything2.com/title/Comba+multiplication), where a is a 69 * (https://everything2.com/title/Comba+multiplication), where a is a
@@ -97,6 +95,51 @@ bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
97#endif 95#endif
98 96
99/* 97/*
98 * bn_sqr_comba6() computes r[] = a[] * a[] using Comba multiplication
99 * (https://everything2.com/title/Comba+multiplication), where a is an
100 * six word array, producing an 12 word array result.
101 */
102#ifndef HAVE_BN_SQR_COMBA6
103void
104bn_sqr_comba6(BN_ULONG *r, const BN_ULONG *a)
105{
106 BN_ULONG c2, c1, c0;
107
108 bn_mulw_addtw(a[0], a[0], 0, 0, 0, &c2, &c1, &r[0]);
109
110 bn_mul2_mulw_addtw(a[1], a[0], 0, c2, c1, &c2, &c1, &r[1]);
111
112 bn_mulw_addtw(a[1], a[1], 0, c2, c1, &c2, &c1, &c0);
113 bn_mul2_mulw_addtw(a[2], a[0], c2, c1, c0, &c2, &c1, &r[2]);
114
115 bn_mul2_mulw_addtw(a[3], a[0], 0, c2, c1, &c2, &c1, &c0);
116 bn_mul2_mulw_addtw(a[2], a[1], c2, c1, c0, &c2, &c1, &r[3]);
117
118 bn_mulw_addtw(a[2], a[2], 0, c2, c1, &c2, &c1, &c0);
119 bn_mul2_mulw_addtw(a[3], a[1], c2, c1, c0, &c2, &c1, &c0);
120 bn_mul2_mulw_addtw(a[4], a[0], c2, c1, c0, &c2, &c1, &r[4]);
121
122 bn_mul2_mulw_addtw(a[5], a[0], 0, c2, c1, &c2, &c1, &c0);
123 bn_mul2_mulw_addtw(a[4], a[1], c2, c1, c0, &c2, &c1, &c0);
124 bn_mul2_mulw_addtw(a[3], a[2], c2, c1, c0, &c2, &c1, &r[5]);
125
126 bn_mulw_addtw(a[3], a[3], 0, c2, c1, &c2, &c1, &c0);
127 bn_mul2_mulw_addtw(a[4], a[2], c2, c1, c0, &c2, &c1, &c0);
128 bn_mul2_mulw_addtw(a[5], a[1], c2, c1, c0, &c2, &c1, &r[6]);
129
130 bn_mul2_mulw_addtw(a[5], a[2], 0, c2, c1, &c2, &c1, &c0);
131 bn_mul2_mulw_addtw(a[4], a[3], c2, c1, c0, &c2, &c1, &r[7]);
132
133 bn_mulw_addtw(a[4], a[4], 0, c2, c1, &c2, &c1, &c0);
134 bn_mul2_mulw_addtw(a[5], a[3], c2, c1, c0, &c2, &c1, &r[8]);
135
136 bn_mul2_mulw_addtw(a[5], a[4], 0, c2, c1, &c2, &c1, &r[9]);
137
138 bn_mulw_addtw(a[5], a[5], 0, c2, c1, &c2, &r[11], &r[10]);
139}
140#endif
141
142/*
100 * bn_sqr_comba8() computes r[] = a[] * a[] using Comba multiplication 143 * bn_sqr_comba8() computes r[] = a[] * a[] using Comba multiplication
101 * (https://everything2.com/title/Comba+multiplication), where a is an 144 * (https://everything2.com/title/Comba+multiplication), where a is an
102 * eight word array, producing an 16 word array result. 145 * eight word array, producing an 16 word array result.
@@ -160,7 +203,7 @@ bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
160} 203}
161#endif 204#endif
162 205
163#ifndef HAVE_BN_SQR 206#ifndef HAVE_BN_SQR_WORDS
164/* 207/*
165 * bn_sqr_add_words() computes (r[i*2+1]:r[i*2]) = (r[i*2+1]:r[i*2]) + a[i] * a[i]. 208 * bn_sqr_add_words() computes (r[i*2+1]:r[i*2]) = (r[i*2+1]:r[i*2]) + a[i] * a[i].
166 */ 209 */
@@ -197,12 +240,16 @@ bn_sqr_add_words(BN_ULONG *r, const BN_ULONG *a, int n)
197 } 240 }
198} 241}
199 242
200static void 243/*
201bn_sqr_normal(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len) 244 * bn_sqr_words() computes r[] = a[] * a[].
245 */
246void
247bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int a_len)
202{ 248{
203 const BN_ULONG *ap; 249 const BN_ULONG *ap;
204 BN_ULONG *rp; 250 BN_ULONG *rp;
205 BN_ULONG w; 251 BN_ULONG w;
252 int r_len;
206 int n; 253 int n;
207 254
208 if (a_len <= 0) 255 if (a_len <= 0)
@@ -213,13 +260,14 @@ bn_sqr_normal(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len)
213 ap++; 260 ap++;
214 261
215 rp = r; 262 rp = r;
263 r_len = a_len * 2;
216 rp[0] = rp[r_len - 1] = 0; 264 rp[0] = rp[r_len - 1] = 0;
217 rp++; 265 rp++;
218 266
219 /* Compute initial product - r[n:1] = a[n:1] * a[0] */ 267 /* Compute initial product - r[n:1] = a[n:1] * a[0] */
220 n = a_len - 1; 268 n = a_len - 1;
221 if (n > 0) { 269 if (n > 0) {
222 rp[n] = bn_mul_words(rp, ap, n, w); 270 rp[n] = bn_mulw_words(rp, ap, n, w);
223 } 271 }
224 rp += 2; 272 rp += 2;
225 n--; 273 n--;
@@ -229,7 +277,7 @@ bn_sqr_normal(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len)
229 w = ap[0]; 277 w = ap[0];
230 ap++; 278 ap++;
231 279
232 rp[n] = bn_mul_add_words(rp, ap, n, w); 280 rp[n] = bn_mulw_add_words(rp, ap, n, w);
233 rp += 2; 281 rp += 2;
234 n--; 282 n--;
235 } 283 }
@@ -240,20 +288,20 @@ bn_sqr_normal(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len)
240 /* Add squares. */ 288 /* Add squares. */
241 bn_sqr_add_words(r, a, a_len); 289 bn_sqr_add_words(r, a, a_len);
242} 290}
291#endif
243 292
244/* 293/*
245 * bn_sqr() computes a * a, storing the result in r. The caller must ensure that 294 * bn_sqr() computes a * a, storing the result in r. The caller must ensure that
246 * r is not the same BIGNUM as a and that r has been expanded to rn = a->top * 2 295 * r is not the same BIGNUM as a and that r has been expanded to rn = a->top * 2
247 * words. 296 * words.
248 */ 297 */
249int 298static int
250bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx) 299bn_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
251{ 300{
252 bn_sqr_normal(r->d, r_len, a->d, a->top); 301 bn_sqr_words(r->d, a->d, a->top);
253 302
254 return 1; 303 return 1;
255} 304}
256#endif
257 305
258int 306int
259BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) 307BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
@@ -281,10 +329,12 @@ BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
281 329
282 if (a->top == 4) { 330 if (a->top == 4) {
283 bn_sqr_comba4(rr->d, a->d); 331 bn_sqr_comba4(rr->d, a->d);
332 } else if (a->top == 6) {
333 bn_sqr_comba6(rr->d, a->d);
284 } else if (a->top == 8) { 334 } else if (a->top == 8) {
285 bn_sqr_comba8(rr->d, a->d); 335 bn_sqr_comba8(rr->d, a->d);
286 } else { 336 } else {
287 if (!bn_sqr(rr, a, r_len, ctx)) 337 if (!bn_sqr(rr, a, ctx))
288 goto err; 338 goto err;
289 } 339 }
290 340
diff --git a/src/lib/libcrypto/bn/bn_word.c b/src/lib/libcrypto/bn/bn_word.c
index a82b911e67..e035878cb9 100644
--- a/src/lib/libcrypto/bn/bn_word.c
+++ b/src/lib/libcrypto/bn/bn_word.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_word.c,v 1.21 2023/07/08 12:21:58 beck Exp $ */ 1/* $OpenBSD: bn_word.c,v 1.22 2025/08/30 07:54:27 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -232,7 +232,7 @@ BN_mul_word(BIGNUM *a, BN_ULONG w)
232 if (w == 0) 232 if (w == 0)
233 BN_zero(a); 233 BN_zero(a);
234 else { 234 else {
235 ll = bn_mul_words(a->d, a->d, a->top, w); 235 ll = bn_mulw_words(a->d, a->d, a->top, w);
236 if (ll) { 236 if (ll) {
237 if (!bn_wexpand(a, a->top + 1)) 237 if (!bn_wexpand(a, a->top + 1))
238 return (0); 238 return (0);
diff --git a/src/lib/libcrypto/bn/s2n_bignum.h b/src/lib/libcrypto/bn/s2n_bignum.h
index ce6e8cdc94..7d77894cdc 100644
--- a/src/lib/libcrypto/bn/s2n_bignum.h
+++ b/src/lib/libcrypto/bn/s2n_bignum.h
@@ -1,3 +1,5 @@
1// $OpenBSD: s2n_bignum.h,v 1.4 2025/08/12 10:01:37 jsing Exp $
2//
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// 4//
3// Permission to use, copy, modify, and/or distribute this software for any 5// Permission to use, copy, modify, and/or distribute this software for any
@@ -34,182 +36,240 @@
34// throughput, generally offering higher performance there. 36// throughput, generally offering higher performance there.
35// ---------------------------------------------------------------------------- 37// ----------------------------------------------------------------------------
36 38
39
40#if defined(_MSC_VER) || !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L || defined(__STDC_NO_VLA__)
41#define S2N_BIGNUM_STATIC
42#else
43#define S2N_BIGNUM_STATIC static
44#endif
45
37// Add, z := x + y 46// Add, z := x + y
38// Inputs x[m], y[n]; outputs function return (carry-out) and z[p] 47// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
39extern uint64_t bignum_add (uint64_t p, uint64_t *z, uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); 48extern uint64_t bignum_add (uint64_t p, uint64_t *z, uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y);
40 49
41// Add modulo p_25519, z := (x + y) mod p_25519, assuming x and y reduced 50// Add modulo p_25519, z := (x + y) mod p_25519, assuming x and y reduced
42// Inputs x[4], y[4]; output z[4] 51// Inputs x[4], y[4]; output z[4]
43extern void bignum_add_p25519 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); 52extern void bignum_add_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
44 53
45// Add modulo p_256, z := (x + y) mod p_256, assuming x and y reduced 54// Add modulo p_256, z := (x + y) mod p_256, assuming x and y reduced
46// Inputs x[4], y[4]; output z[4] 55// Inputs x[4], y[4]; output z[4]
47extern void bignum_add_p256 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); 56extern void bignum_add_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
48 57
49// Add modulo p_256k1, z := (x + y) mod p_256k1, assuming x and y reduced 58// Add modulo p_256k1, z := (x + y) mod p_256k1, assuming x and y reduced
50// Inputs x[4], y[4]; output z[4] 59// Inputs x[4], y[4]; output z[4]
51extern void bignum_add_p256k1 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); 60extern void bignum_add_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
52 61
53// Add modulo p_384, z := (x + y) mod p_384, assuming x and y reduced 62// Add modulo p_384, z := (x + y) mod p_384, assuming x and y reduced
54// Inputs x[6], y[6]; output z[6] 63// Inputs x[6], y[6]; output z[6]
55extern void bignum_add_p384 (uint64_t z[static 6], uint64_t x[static 6], uint64_t y[static 6]); 64extern void bignum_add_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]);
56 65
57// Add modulo p_521, z := (x + y) mod p_521, assuming x and y reduced 66// Add modulo p_521, z := (x + y) mod p_521, assuming x and y reduced
58// Inputs x[9], y[9]; output z[9] 67// Inputs x[9], y[9]; output z[9]
59extern void bignum_add_p521 (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); 68extern void bignum_add_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9], const uint64_t y[S2N_BIGNUM_STATIC 9]);
69
70// Add modulo p_sm2, z := (x + y) mod p_sm2, assuming x and y reduced
71// Inputs x[4], y[4]; output z[4]
72extern void bignum_add_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
60 73
61// Compute "amontification" constant z :== 2^{128k} (congruent mod m) 74// Compute "amontification" constant z :== 2^{128k} (congruent mod m)
62// Input m[k]; output z[k]; temporary buffer t[>=k] 75// Input m[k]; output z[k]; temporary buffer t[>=k]
63extern void bignum_amontifier (uint64_t k, uint64_t *z, uint64_t *m, uint64_t *t); 76extern void bignum_amontifier (uint64_t k, uint64_t *z, const uint64_t *m, uint64_t *t);
64 77
65// Almost-Montgomery multiply, z :== (x * y / 2^{64k}) (congruent mod m) 78// Almost-Montgomery multiply, z :== (x * y / 2^{64k}) (congruent mod m)
66// Inputs x[k], y[k], m[k]; output z[k] 79// Inputs x[k], y[k], m[k]; output z[k]
67extern void bignum_amontmul (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y, uint64_t *m); 80extern void bignum_amontmul (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *y, const uint64_t *m);
68 81
69// Almost-Montgomery reduce, z :== (x' / 2^{64p}) (congruent mod m) 82// Almost-Montgomery reduce, z :== (x' / 2^{64p}) (congruent mod m)
70// Inputs x[n], m[k], p; output z[k] 83// Inputs x[n], m[k], p; output z[k]
71extern void bignum_amontredc (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t *m, uint64_t p); 84extern void bignum_amontredc (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x, const uint64_t *m, uint64_t p);
72 85
73// Almost-Montgomery square, z :== (x^2 / 2^{64k}) (congruent mod m) 86// Almost-Montgomery square, z :== (x^2 / 2^{64k}) (congruent mod m)
74// Inputs x[k], m[k]; output z[k] 87// Inputs x[k], m[k]; output z[k]
75extern void bignum_amontsqr (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *m); 88extern void bignum_amontsqr (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *m);
76 89
77// Convert 4-digit (256-bit) bignum to/from big-endian form 90// Convert 4-digit (256-bit) bignum to/from big-endian form
78// Input x[4]; output z[4] 91// Input x[4]; output z[4]
79extern void bignum_bigendian_4 (uint64_t z[static 4], uint64_t x[static 4]); 92extern void bignum_bigendian_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
80 93
81// Convert 6-digit (384-bit) bignum to/from big-endian form 94// Convert 6-digit (384-bit) bignum to/from big-endian form
82// Input x[6]; output z[6] 95// Input x[6]; output z[6]
83extern void bignum_bigendian_6 (uint64_t z[static 6], uint64_t x[static 6]); 96extern void bignum_bigendian_6 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]);
84 97
85// Select bitfield starting at bit n with length l <= 64 98// Select bitfield starting at bit n with length l <= 64
86// Inputs x[k], n, l; output function return 99// Inputs x[k], n, l; output function return
87extern uint64_t bignum_bitfield (uint64_t k, uint64_t *x, uint64_t n, uint64_t l); 100extern uint64_t bignum_bitfield (uint64_t k, const uint64_t *x, uint64_t n, uint64_t l);
88 101
89// Return size of bignum in bits 102// Return size of bignum in bits
90// Input x[k]; output function return 103// Input x[k]; output function return
91extern uint64_t bignum_bitsize (uint64_t k, uint64_t *x); 104extern uint64_t bignum_bitsize (uint64_t k, const uint64_t *x);
92 105
93// Divide by a single (nonzero) word, z := x / m and return x mod m 106// Divide by a single (nonzero) word, z := x / m and return x mod m
94// Inputs x[n], m; outputs function return (remainder) and z[k] 107// Inputs x[n], m; outputs function return (remainder) and z[k]
95extern uint64_t bignum_cdiv (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t m); 108extern uint64_t bignum_cdiv (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x, uint64_t m);
96 109
97// Divide by a single word, z := x / m when known to be exact 110// Divide by a single word, z := x / m when known to be exact
98// Inputs x[n], m; output z[k] 111// Inputs x[n], m; output z[k]
99extern void bignum_cdiv_exact (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t m); 112extern void bignum_cdiv_exact (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x, uint64_t m);
100 113
101// Count leading zero digits (64-bit words) 114// Count leading zero digits (64-bit words)
102// Input x[k]; output function return 115// Input x[k]; output function return
103extern uint64_t bignum_cld (uint64_t k, uint64_t *x); 116extern uint64_t bignum_cld (uint64_t k, const uint64_t *x);
104 117
105// Count leading zero bits 118// Count leading zero bits
106// Input x[k]; output function return 119// Input x[k]; output function return
107extern uint64_t bignum_clz (uint64_t k, uint64_t *x); 120extern uint64_t bignum_clz (uint64_t k, const uint64_t *x);
108 121
109// Multiply-add with single-word multiplier, z := z + c * y 122// Multiply-add with single-word multiplier, z := z + c * y
110// Inputs c, y[n]; outputs function return (carry-out) and z[k] 123// Inputs c, y[n]; outputs function return (carry-out) and z[k]
111extern uint64_t bignum_cmadd (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); 124extern uint64_t bignum_cmadd (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, const uint64_t *y);
112 125
113// Negated multiply-add with single-word multiplier, z := z - c * y 126// Negated multiply-add with single-word multiplier, z := z - c * y
114// Inputs c, y[n]; outputs function return (negative carry-out) and z[k] 127// Inputs c, y[n]; outputs function return (negative carry-out) and z[k]
115extern uint64_t bignum_cmnegadd (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); 128extern uint64_t bignum_cmnegadd (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, const uint64_t *y);
116 129
117// Find modulus of bignum w.r.t. single nonzero word m, returning x mod m 130// Find modulus of bignum w.r.t. single nonzero word m, returning x mod m
118// Input x[k], m; output function return 131// Input x[k], m; output function return
119extern uint64_t bignum_cmod (uint64_t k, uint64_t *x, uint64_t m); 132extern uint64_t bignum_cmod (uint64_t k, const uint64_t *x, uint64_t m);
120 133
121// Multiply by a single word, z := c * y 134// Multiply by a single word, z := c * y
122// Inputs c, y[n]; outputs function return (carry-out) and z[k] 135// Inputs c, y[n]; outputs function return (carry-out) and z[k]
123extern uint64_t bignum_cmul (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); 136extern uint64_t bignum_cmul (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, const uint64_t *y);
124 137
125// Multiply by a single word modulo p_25519, z := (c * x) mod p_25519, assuming x reduced 138// Multiply by a single word modulo p_25519, z := (c * x) mod p_25519, assuming x reduced
126// Inputs c, x[4]; output z[4] 139// Inputs c, x[4]; output z[4]
127extern void bignum_cmul_p25519 (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]); 140extern void bignum_cmul_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]);
128extern void bignum_cmul_p25519_alt (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]); 141extern void bignum_cmul_p25519_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]);
129 142
130// Multiply by a single word modulo p_256, z := (c * x) mod p_256, assuming x reduced 143// Multiply by a single word modulo p_256, z := (c * x) mod p_256, assuming x reduced
131// Inputs c, x[4]; output z[4] 144// Inputs c, x[4]; output z[4]
132extern void bignum_cmul_p256 (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]); 145extern void bignum_cmul_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]);
133extern void bignum_cmul_p256_alt (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]); 146extern void bignum_cmul_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]);
134 147
135// Multiply by a single word modulo p_256k1, z := (c * x) mod p_256k1, assuming x reduced 148// Multiply by a single word modulo p_256k1, z := (c * x) mod p_256k1, assuming x reduced
136// Inputs c, x[4]; output z[4] 149// Inputs c, x[4]; output z[4]
137extern void bignum_cmul_p256k1 (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]); 150extern void bignum_cmul_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]);
138extern void bignum_cmul_p256k1_alt (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]); 151extern void bignum_cmul_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]);
139 152
140// Multiply by a single word modulo p_384, z := (c * x) mod p_384, assuming x reduced 153// Multiply by a single word modulo p_384, z := (c * x) mod p_384, assuming x reduced
141// Inputs c, x[6]; output z[6] 154// Inputs c, x[6]; output z[6]
142extern void bignum_cmul_p384 (uint64_t z[static 6], uint64_t c, uint64_t x[static 6]); 155extern void bignum_cmul_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 6]);
143extern void bignum_cmul_p384_alt (uint64_t z[static 6], uint64_t c, uint64_t x[static 6]); 156extern void bignum_cmul_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 6]);
144 157
145// Multiply by a single word modulo p_521, z := (c * x) mod p_521, assuming x reduced 158// Multiply by a single word modulo p_521, z := (c * x) mod p_521, assuming x reduced
146// Inputs c, x[9]; output z[9] 159// Inputs c, x[9]; output z[9]
147extern void bignum_cmul_p521 (uint64_t z[static 9], uint64_t c, uint64_t x[static 9]); 160extern void bignum_cmul_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 9]);
148extern void bignum_cmul_p521_alt (uint64_t z[static 9], uint64_t c, uint64_t x[static 9]); 161extern void bignum_cmul_p521_alt (uint64_t z[S2N_BIGNUM_STATIC 9], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 9]);
162
163// Multiply by a single word modulo p_sm2, z := (c * x) mod p_sm2, assuming x reduced
164// Inputs c, x[4]; output z[4]
165extern void bignum_cmul_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]);
166extern void bignum_cmul_sm2_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]);
149 167
150// Test bignums for coprimality, gcd(x,y) = 1 168// Test bignums for coprimality, gcd(x,y) = 1
151// Inputs x[m], y[n]; output function return; temporary buffer t[>=2*max(m,n)] 169// Inputs x[m], y[n]; output function return; temporary buffer t[>=2*max(m,n)]
152extern uint64_t bignum_coprime (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y, uint64_t *t); 170extern uint64_t bignum_coprime (uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y, uint64_t *t);
153 171
154// Copy bignum with zero-extension or truncation, z := x 172// Copy bignum with zero-extension or truncation, z := x
155// Input x[n]; output z[k] 173// Input x[n]; output z[k]
156extern void bignum_copy (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x); 174extern void bignum_copy (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x);
175
176// Given table: uint64_t[height*width], copy table[idx*width...(idx+1)*width-1]
177// into z[0..width-1].
178// This function is constant-time with respect to the value of `idx`. This is
179// achieved by reading the whole table and using the bit-masking to get the
180// `idx`-th row.
181// Input table[height*width]; output z[width]
182extern void bignum_copy_row_from_table (uint64_t *z, const uint64_t *table, uint64_t height,
183 uint64_t width, uint64_t idx);
184
185// Given table: uint64_t[height*width], copy table[idx*width...(idx+1)*width-1]
186// into z[0..width-1]. width must be a multiple of 8.
187// This function is constant-time with respect to the value of `idx`. This is
188// achieved by reading the whole table and using the bit-masking to get the
189// `idx`-th row.
190// Input table[height*width]; output z[width]
191extern void bignum_copy_row_from_table_8n (uint64_t *z, const uint64_t *table,
192 uint64_t height, uint64_t width, uint64_t idx);
193
194// Given table: uint64_t[height*16], copy table[idx*16...(idx+1)*16-1] into z[0..row-1].
195// This function is constant-time with respect to the value of `idx`. This is
196// achieved by reading the whole table and using the bit-masking to get the
197// `idx`-th row.
198// Input table[height*16]; output z[16]
199extern void bignum_copy_row_from_table_16 (uint64_t *z, const uint64_t *table,
200 uint64_t height, uint64_t idx);
201
202// Given table: uint64_t[height*32], copy table[idx*32...(idx+1)*32-1] into z[0..row-1].
203// This function is constant-time with respect to the value of `idx`. This is
204// achieved by reading the whole table and using the bit-masking to get the
205// `idx`-th row.
206// Input table[height*32]; output z[32]
207extern void bignum_copy_row_from_table_32 (uint64_t *z, const uint64_t *table,
208 uint64_t height, uint64_t idx);
157 209
158// Count trailing zero digits (64-bit words) 210// Count trailing zero digits (64-bit words)
159// Input x[k]; output function return 211// Input x[k]; output function return
160extern uint64_t bignum_ctd (uint64_t k, uint64_t *x); 212extern uint64_t bignum_ctd (uint64_t k, const uint64_t *x);
161 213
162// Count trailing zero bits 214// Count trailing zero bits
163// Input x[k]; output function return 215// Input x[k]; output function return
164extern uint64_t bignum_ctz (uint64_t k, uint64_t *x); 216extern uint64_t bignum_ctz (uint64_t k, const uint64_t *x);
165 217
166// Convert from almost-Montgomery form, z := (x / 2^256) mod p_256 218// Convert from almost-Montgomery form, z := (x / 2^256) mod p_256
167// Input x[4]; output z[4] 219// Input x[4]; output z[4]
168extern void bignum_deamont_p256 (uint64_t z[static 4], uint64_t x[static 4]); 220extern void bignum_deamont_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
169extern void bignum_deamont_p256_alt (uint64_t z[static 4], uint64_t x[static 4]); 221extern void bignum_deamont_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
170 222
171// Convert from almost-Montgomery form, z := (x / 2^256) mod p_256k1 223// Convert from almost-Montgomery form, z := (x / 2^256) mod p_256k1
172// Input x[4]; output z[4] 224// Input x[4]; output z[4]
173extern void bignum_deamont_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); 225extern void bignum_deamont_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
174 226
175// Convert from almost-Montgomery form, z := (x / 2^384) mod p_384 227// Convert from almost-Montgomery form, z := (x / 2^384) mod p_384
176// Input x[6]; output z[6] 228// Input x[6]; output z[6]
177extern void bignum_deamont_p384 (uint64_t z[static 6], uint64_t x[static 6]); 229extern void bignum_deamont_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]);
178extern void bignum_deamont_p384_alt (uint64_t z[static 6], uint64_t x[static 6]); 230extern void bignum_deamont_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]);
179 231
180// Convert from almost-Montgomery form z := (x / 2^576) mod p_521 232// Convert from almost-Montgomery form z := (x / 2^576) mod p_521
181// Input x[9]; output z[9] 233// Input x[9]; output z[9]
182extern void bignum_deamont_p521 (uint64_t z[static 9], uint64_t x[static 9]); 234extern void bignum_deamont_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]);
235
236// Convert from almost-Montgomery form z := (x / 2^256) mod p_sm2
237// Input x[4]; output z[4]
238extern void bignum_deamont_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
183 239
184// Convert from (almost-)Montgomery form z := (x / 2^{64k}) mod m 240// Convert from (almost-)Montgomery form z := (x / 2^{64k}) mod m
185// Inputs x[k], m[k]; output z[k] 241// Inputs x[k], m[k]; output z[k]
186extern void bignum_demont (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *m); 242extern void bignum_demont (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *m);
187 243
188// Convert from Montgomery form z := (x / 2^256) mod p_256, assuming x reduced 244// Convert from Montgomery form z := (x / 2^256) mod p_256, assuming x reduced
189// Input x[4]; output z[4] 245// Input x[4]; output z[4]
190extern void bignum_demont_p256 (uint64_t z[static 4], uint64_t x[static 4]); 246extern void bignum_demont_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
191extern void bignum_demont_p256_alt (uint64_t z[static 4], uint64_t x[static 4]); 247extern void bignum_demont_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
192 248
193// Convert from Montgomery form z := (x / 2^256) mod p_256k1, assuming x reduced 249// Convert from Montgomery form z := (x / 2^256) mod p_256k1, assuming x reduced
194// Input x[4]; output z[4] 250// Input x[4]; output z[4]
195extern void bignum_demont_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); 251extern void bignum_demont_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
196 252
197// Convert from Montgomery form z := (x / 2^384) mod p_384, assuming x reduced 253// Convert from Montgomery form z := (x / 2^384) mod p_384, assuming x reduced
198// Input x[6]; output z[6] 254// Input x[6]; output z[6]
199extern void bignum_demont_p384 (uint64_t z[static 6], uint64_t x[static 6]); 255extern void bignum_demont_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]);
200extern void bignum_demont_p384_alt (uint64_t z[static 6], uint64_t x[static 6]); 256extern void bignum_demont_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]);
201 257
202// Convert from Montgomery form z := (x / 2^576) mod p_521, assuming x reduced 258// Convert from Montgomery form z := (x / 2^576) mod p_521, assuming x reduced
203// Input x[9]; output z[9] 259// Input x[9]; output z[9]
204extern void bignum_demont_p521 (uint64_t z[static 9], uint64_t x[static 9]); 260extern void bignum_demont_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]);
261
262// Convert from Montgomery form z := (x / 2^256) mod p_sm2, assuming x reduced
263// Input x[4]; output z[4]
264extern void bignum_demont_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
205 265
206// Select digit x[n] 266// Select digit x[n]
207// Inputs x[k], n; output function return 267// Inputs x[k], n; output function return
208extern uint64_t bignum_digit (uint64_t k, uint64_t *x, uint64_t n); 268extern uint64_t bignum_digit (uint64_t k, const uint64_t *x, uint64_t n);
209 269
210// Return size of bignum in digits (64-bit word) 270// Return size of bignum in digits (64-bit word)
211// Input x[k]; output function return 271// Input x[k]; output function return
212extern uint64_t bignum_digitsize (uint64_t k, uint64_t *x); 272extern uint64_t bignum_digitsize (uint64_t k, const uint64_t *x);
213 273
214// Divide bignum by 10: z' := z div 10, returning remainder z mod 10 274// Divide bignum by 10: z' := z div 10, returning remainder z mod 10
215// Inputs z[k]; outputs function return (remainder) and z[k] 275// Inputs z[k]; outputs function return (remainder) and z[k]
@@ -217,294 +277,391 @@ extern uint64_t bignum_divmod10 (uint64_t k, uint64_t *z);
217 277
218// Double modulo p_25519, z := (2 * x) mod p_25519, assuming x reduced 278// Double modulo p_25519, z := (2 * x) mod p_25519, assuming x reduced
219// Input x[4]; output z[4] 279// Input x[4]; output z[4]
220extern void bignum_double_p25519 (uint64_t z[static 4], uint64_t x[static 4]); 280extern void bignum_double_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
221 281
222// Double modulo p_256, z := (2 * x) mod p_256, assuming x reduced 282// Double modulo p_256, z := (2 * x) mod p_256, assuming x reduced
223// Input x[4]; output z[4] 283// Input x[4]; output z[4]
224extern void bignum_double_p256 (uint64_t z[static 4], uint64_t x[static 4]); 284extern void bignum_double_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
225 285
226// Double modulo p_256k1, z := (2 * x) mod p_256k1, assuming x reduced 286// Double modulo p_256k1, z := (2 * x) mod p_256k1, assuming x reduced
227// Input x[4]; output z[4] 287// Input x[4]; output z[4]
228extern void bignum_double_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); 288extern void bignum_double_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
229 289
230// Double modulo p_384, z := (2 * x) mod p_384, assuming x reduced 290// Double modulo p_384, z := (2 * x) mod p_384, assuming x reduced
231// Input x[6]; output z[6] 291// Input x[6]; output z[6]
232extern void bignum_double_p384 (uint64_t z[static 6], uint64_t x[static 6]); 292extern void bignum_double_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]);
233 293
234// Double modulo p_521, z := (2 * x) mod p_521, assuming x reduced 294// Double modulo p_521, z := (2 * x) mod p_521, assuming x reduced
235// Input x[9]; output z[9] 295// Input x[9]; output z[9]
236extern void bignum_double_p521 (uint64_t z[static 9], uint64_t x[static 9]); 296extern void bignum_double_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]);
297
298// Double modulo p_sm2, z := (2 * x) mod p_sm2, assuming x reduced
299// Input x[4]; output z[4]
300extern void bignum_double_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
237 301
238// Extended Montgomery reduce, returning results in input-output buffer 302// Extended Montgomery reduce, returning results in input-output buffer
239// Inputs z[2*k], m[k], w; outputs function return (extra result bit) and z[2*k] 303// Inputs z[2*k], m[k], w; outputs function return (extra result bit) and z[2*k]
240extern uint64_t bignum_emontredc (uint64_t k, uint64_t *z, uint64_t *m, uint64_t w); 304extern uint64_t bignum_emontredc (uint64_t k, uint64_t *z, const uint64_t *m, uint64_t w);
241 305
242// Extended Montgomery reduce in 8-digit blocks, results in input-output buffer 306// Extended Montgomery reduce in 8-digit blocks, results in input-output buffer
243// Inputs z[2*k], m[k], w; outputs function return (extra result bit) and z[2*k] 307// Inputs z[2*k], m[k], w; outputs function return (extra result bit) and z[2*k]
244extern uint64_t bignum_emontredc_8n (uint64_t k, uint64_t *z, uint64_t *m, uint64_t w); 308extern uint64_t bignum_emontredc_8n (uint64_t k, uint64_t *z, const uint64_t *m, uint64_t w);
309// Inputs z[2*k], m[k], w; outputs function return (extra result bit) and z[2*k]
310// Temporary buffer m_precalc[12*(k/4-1)]
311extern uint64_t bignum_emontredc_8n_cdiff (uint64_t k, uint64_t *z, const uint64_t *m,
312 uint64_t w, uint64_t *m_precalc);
245 313
246// Test bignums for equality, x = y 314// Test bignums for equality, x = y
247// Inputs x[m], y[n]; output function return 315// Inputs x[m], y[n]; output function return
248extern uint64_t bignum_eq (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); 316extern uint64_t bignum_eq (uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y);
249 317
250// Test bignum for even-ness 318// Test bignum for even-ness
251// Input x[k]; output function return 319// Input x[k]; output function return
252extern uint64_t bignum_even (uint64_t k, uint64_t *x); 320extern uint64_t bignum_even (uint64_t k, const uint64_t *x);
253 321
254// Convert 4-digit (256-bit) bignum from big-endian bytes 322// Convert 4-digit (256-bit) bignum from big-endian bytes
255// Input x[32] (bytes); output z[4] 323// Input x[32] (bytes); output z[4]
256extern void bignum_frombebytes_4 (uint64_t z[static 4], uint8_t x[static 32]); 324extern void bignum_frombebytes_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint8_t x[S2N_BIGNUM_STATIC 32]);
257 325
258// Convert 6-digit (384-bit) bignum from big-endian bytes 326// Convert 6-digit (384-bit) bignum from big-endian bytes
259// Input x[48] (bytes); output z[6] 327// Input x[48] (bytes); output z[6]
260extern void bignum_frombebytes_6 (uint64_t z[static 6], uint8_t x[static 48]); 328extern void bignum_frombebytes_6 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint8_t x[S2N_BIGNUM_STATIC 48]);
261 329
262// Convert 4-digit (256-bit) bignum from little-endian bytes 330// Convert 4-digit (256-bit) bignum from little-endian bytes
263// Input x[32] (bytes); output z[4] 331// Input x[32] (bytes); output z[4]
264extern void bignum_fromlebytes_4 (uint64_t z[static 4], uint8_t x[static 32]); 332extern void bignum_fromlebytes_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint8_t x[S2N_BIGNUM_STATIC 32]);
265 333
266// Convert 6-digit (384-bit) bignum from little-endian bytes 334// Convert 6-digit (384-bit) bignum from little-endian bytes
267// Input x[48] (bytes); output z[6] 335// Input x[48] (bytes); output z[6]
268extern void bignum_fromlebytes_6 (uint64_t z[static 6], uint8_t x[static 48]); 336extern void bignum_fromlebytes_6 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint8_t x[S2N_BIGNUM_STATIC 48]);
269 337
270// Convert little-endian bytes to 9-digit 528-bit bignum 338// Convert little-endian bytes to 9-digit 528-bit bignum
271// Input x[66] (bytes); output z[9] 339// Input x[66] (bytes); output z[9]
272extern void bignum_fromlebytes_p521 (uint64_t z[static 9],uint8_t x[static 66]); 340extern void bignum_fromlebytes_p521 (uint64_t z[S2N_BIGNUM_STATIC 9],const uint8_t x[S2N_BIGNUM_STATIC 66]);
273 341
274// Compare bignums, x >= y 342// Compare bignums, x >= y
275// Inputs x[m], y[n]; output function return 343// Inputs x[m], y[n]; output function return
276extern uint64_t bignum_ge (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); 344extern uint64_t bignum_ge (uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y);
277 345
278// Compare bignums, x > y 346// Compare bignums, x > y
279// Inputs x[m], y[n]; output function return 347// Inputs x[m], y[n]; output function return
280extern uint64_t bignum_gt (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); 348extern uint64_t bignum_gt (uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y);
281 349
282// Halve modulo p_256, z := (x / 2) mod p_256, assuming x reduced 350// Halve modulo p_256, z := (x / 2) mod p_256, assuming x reduced
283// Input x[4]; output z[4] 351// Input x[4]; output z[4]
284extern void bignum_half_p256 (uint64_t z[static 4], uint64_t x[static 4]); 352extern void bignum_half_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
285 353
286// Halve modulo p_256k1, z := (x / 2) mod p_256k1, assuming x reduced 354// Halve modulo p_256k1, z := (x / 2) mod p_256k1, assuming x reduced
287// Input x[4]; output z[4] 355// Input x[4]; output z[4]
288extern void bignum_half_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); 356extern void bignum_half_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
289 357
290// Halve modulo p_384, z := (x / 2) mod p_384, assuming x reduced 358// Halve modulo p_384, z := (x / 2) mod p_384, assuming x reduced
291// Input x[6]; output z[6] 359// Input x[6]; output z[6]
292extern void bignum_half_p384 (uint64_t z[static 6], uint64_t x[static 6]); 360extern void bignum_half_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]);
293 361
294// Halve modulo p_521, z := (x / 2) mod p_521, assuming x reduced 362// Halve modulo p_521, z := (x / 2) mod p_521, assuming x reduced
295// Input x[9]; output z[9] 363// Input x[9]; output z[9]
296extern void bignum_half_p521 (uint64_t z[static 9], uint64_t x[static 9]); 364extern void bignum_half_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]);
365
366// Halve modulo p_sm2, z := (x / 2) mod p_sm2, assuming x reduced
367// Input x[4]; output z[4]
368extern void bignum_half_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
369
370// Modular inverse modulo p_25519 = 2^255 - 19
371// Input x[4]; output z[4]
372extern void bignum_inv_p25519(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]);
373
374// Modular inverse modulo p_256 = 2^256 - 2^224 + 2^192 + 2^96 - 1
375// Input x[4]; output z[4]
376extern void bignum_inv_p256(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]);
377
378// Modular inverse modulo p_384 = 2^384 - 2^128 - 2^96 + 2^32 - 1
379// Input x[6]; output z[6]
380extern void bignum_inv_p384(uint64_t z[S2N_BIGNUM_STATIC 6],const uint64_t x[S2N_BIGNUM_STATIC 6]);
381
382// Modular inverse modulo p_521 = 2^521 - 1
383// Input x[9]; output z[9]
384extern void bignum_inv_p521(uint64_t z[S2N_BIGNUM_STATIC 9],const uint64_t x[S2N_BIGNUM_STATIC 9]);
385
386// Modular inverse modulo p_sm2 = 2^256 - 2^224 - 2^96 + 2^64 - 1
387// Input x[4]; output z[4]
388extern void bignum_inv_sm2(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]);
389
390// Inverse square root modulo p_25519
391// Input x[4]; output function return (Legendre symbol) and z[4]
392extern int64_t bignum_invsqrt_p25519(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]);
393extern int64_t bignum_invsqrt_p25519_alt(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]);
297 394
298// Test bignum for zero-ness, x = 0 395// Test bignum for zero-ness, x = 0
299// Input x[k]; output function return 396// Input x[k]; output function return
300extern uint64_t bignum_iszero (uint64_t k, uint64_t *x); 397extern uint64_t bignum_iszero (uint64_t k, const uint64_t *x);
301 398
302// Multiply z := x * y 399// Multiply z := x * y
303// Inputs x[16], y[16]; output z[32]; temporary buffer t[>=32] 400// Inputs x[16], y[16]; output z[32]; temporary buffer t[>=32]
304extern void bignum_kmul_16_32 (uint64_t z[static 32], uint64_t x[static 16], uint64_t y[static 16], uint64_t t[static 32]); 401extern void bignum_kmul_16_32 (uint64_t z[S2N_BIGNUM_STATIC 32], const uint64_t x[S2N_BIGNUM_STATIC 16], const uint64_t y[S2N_BIGNUM_STATIC 16], uint64_t t[S2N_BIGNUM_STATIC 32]);
305 402
306// Multiply z := x * y 403// Multiply z := x * y
307// Inputs x[32], y[32]; output z[64]; temporary buffer t[>=96] 404// Inputs x[32], y[32]; output z[64]; temporary buffer t[>=96]
308extern void bignum_kmul_32_64 (uint64_t z[static 64], uint64_t x[static 32], uint64_t y[static 32], uint64_t t[static 96]); 405extern void bignum_kmul_32_64 (uint64_t z[S2N_BIGNUM_STATIC 64], const uint64_t x[S2N_BIGNUM_STATIC 32], const uint64_t y[S2N_BIGNUM_STATIC 32], uint64_t t[S2N_BIGNUM_STATIC 96]);
309 406
310// Square, z := x^2 407// Square, z := x^2
311// Input x[16]; output z[32]; temporary buffer t[>=24] 408// Input x[16]; output z[32]; temporary buffer t[>=24]
312extern void bignum_ksqr_16_32 (uint64_t z[static 32], uint64_t x[static 16], uint64_t t[static 24]); 409extern void bignum_ksqr_16_32 (uint64_t z[S2N_BIGNUM_STATIC 32], const uint64_t x[S2N_BIGNUM_STATIC 16], uint64_t t[S2N_BIGNUM_STATIC 24]);
313 410
314// Square, z := x^2 411// Square, z := x^2
315// Input x[32]; output z[64]; temporary buffer t[>=72] 412// Input x[32]; output z[64]; temporary buffer t[>=72]
316extern void bignum_ksqr_32_64 (uint64_t z[static 64], uint64_t x[static 32], uint64_t t[static 72]); 413extern void bignum_ksqr_32_64 (uint64_t z[S2N_BIGNUM_STATIC 64], const uint64_t x[S2N_BIGNUM_STATIC 32], uint64_t t[S2N_BIGNUM_STATIC 72]);
317 414
318// Compare bignums, x <= y 415// Compare bignums, x <= y
319// Inputs x[m], y[n]; output function return 416// Inputs x[m], y[n]; output function return
320extern uint64_t bignum_le (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); 417extern uint64_t bignum_le (uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y);
321 418
322// Convert 4-digit (256-bit) bignum to/from little-endian form 419// Convert 4-digit (256-bit) bignum to/from little-endian form
323// Input x[4]; output z[4] 420// Input x[4]; output z[4]
324extern void bignum_littleendian_4 (uint64_t z[static 4], uint64_t x[static 4]); 421extern void bignum_littleendian_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
325 422
326// Convert 6-digit (384-bit) bignum to/from little-endian form 423// Convert 6-digit (384-bit) bignum to/from little-endian form
327// Input x[6]; output z[6] 424// Input x[6]; output z[6]
328extern void bignum_littleendian_6 (uint64_t z[static 6], uint64_t x[static 6]); 425extern void bignum_littleendian_6 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]);
329 426
330// Compare bignums, x < y 427// Compare bignums, x < y
331// Inputs x[m], y[n]; output function return 428// Inputs x[m], y[n]; output function return
332extern uint64_t bignum_lt (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); 429extern uint64_t bignum_lt (uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y);
333 430
334// Multiply-add, z := z + x * y 431// Multiply-add, z := z + x * y
335// Inputs x[m], y[n]; outputs function return (carry-out) and z[k] 432// Inputs x[m], y[n]; outputs function return (carry-out) and z[k]
336extern uint64_t bignum_madd (uint64_t k, uint64_t *z, uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); 433extern uint64_t bignum_madd (uint64_t k, uint64_t *z, uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y);
434
435// Multiply-add modulo the order of the curve25519/edwards25519 basepoint
436// Inputs x[4], y[4], c[4]; output z[4]
437extern void bignum_madd_n25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4], const uint64_t c[S2N_BIGNUM_STATIC 4]);
438extern void bignum_madd_n25519_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4], const uint64_t c[S2N_BIGNUM_STATIC 4]);
439
440// Reduce modulo group order, z := x mod m_25519
441// Input x[4]; output z[4]
442extern void bignum_mod_m25519_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
443
444// Reduce modulo basepoint order, z := x mod n_25519
445// Input x[k]; output z[4]
446extern void bignum_mod_n25519 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x);
447
448// Reduce modulo basepoint order, z := x mod n_25519
449// Input x[4]; output z[4]
450extern void bignum_mod_n25519_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
337 451
338// Reduce modulo group order, z := x mod n_256 452// Reduce modulo group order, z := x mod n_256
339// Input x[k]; output z[4] 453// Input x[k]; output z[4]
340extern void bignum_mod_n256 (uint64_t z[static 4], uint64_t k, uint64_t *x); 454extern void bignum_mod_n256 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x);
341extern void bignum_mod_n256_alt (uint64_t z[static 4], uint64_t k, uint64_t *x); 455extern void bignum_mod_n256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x);
342 456
343// Reduce modulo group order, z := x mod n_256 457// Reduce modulo group order, z := x mod n_256
344// Input x[4]; output z[4] 458// Input x[4]; output z[4]
345extern void bignum_mod_n256_4 (uint64_t z[static 4], uint64_t x[static 4]); 459extern void bignum_mod_n256_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
346 460
347// Reduce modulo group order, z := x mod n_256k1 461// Reduce modulo group order, z := x mod n_256k1
348// Input x[4]; output z[4] 462// Input x[4]; output z[4]
349extern void bignum_mod_n256k1_4 (uint64_t z[static 4], uint64_t x[static 4]); 463extern void bignum_mod_n256k1_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
350 464
351// Reduce modulo group order, z := x mod n_384 465// Reduce modulo group order, z := x mod n_384
352// Input x[k]; output z[6] 466// Input x[k]; output z[6]
353extern void bignum_mod_n384 (uint64_t z[static 6], uint64_t k, uint64_t *x); 467extern void bignum_mod_n384 (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t k, const uint64_t *x);
354extern void bignum_mod_n384_alt (uint64_t z[static 6], uint64_t k, uint64_t *x); 468extern void bignum_mod_n384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t k, const uint64_t *x);
355 469
356// Reduce modulo group order, z := x mod n_384 470// Reduce modulo group order, z := x mod n_384
357// Input x[6]; output z[6] 471// Input x[6]; output z[6]
358extern void bignum_mod_n384_6 (uint64_t z[static 6], uint64_t x[static 6]); 472extern void bignum_mod_n384_6 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]);
359 473
360// Reduce modulo group order, z := x mod n_521 474// Reduce modulo group order, z := x mod n_521
361// Input x[9]; output z[9] 475// Input x[9]; output z[9]
362extern void bignum_mod_n521_9 (uint64_t z[static 9], uint64_t x[static 9]); 476extern void bignum_mod_n521_9 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]);
363extern void bignum_mod_n521_9_alt (uint64_t z[static 9], uint64_t x[static 9]); 477extern void bignum_mod_n521_9_alt (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]);
478
479// Reduce modulo group order, z := x mod n_sm2
480// Input x[k]; output z[4]
481extern void bignum_mod_nsm2 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x);
482extern void bignum_mod_nsm2_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x);
483
484// Reduce modulo group order, z := x mod n_sm2
485// Input x[4]; output z[4]
486extern void bignum_mod_nsm2_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
364 487
365// Reduce modulo field characteristic, z := x mod p_25519 488// Reduce modulo field characteristic, z := x mod p_25519
366// Input x[4]; output z[4] 489// Input x[4]; output z[4]
367extern void bignum_mod_p25519_4 (uint64_t z[static 4], uint64_t x[static 4]); 490extern void bignum_mod_p25519_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
368 491
369// Reduce modulo field characteristic, z := x mod p_256 492// Reduce modulo field characteristic, z := x mod p_256
370// Input x[k]; output z[4] 493// Input x[k]; output z[4]
371extern void bignum_mod_p256 (uint64_t z[static 4], uint64_t k, uint64_t *x); 494extern void bignum_mod_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x);
372extern void bignum_mod_p256_alt (uint64_t z[static 4], uint64_t k, uint64_t *x); 495extern void bignum_mod_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x);
373 496
374// Reduce modulo field characteristic, z := x mod p_256 497// Reduce modulo field characteristic, z := x mod p_256
375// Input x[4]; output z[4] 498// Input x[4]; output z[4]
376extern void bignum_mod_p256_4 (uint64_t z[static 4], uint64_t x[static 4]); 499extern void bignum_mod_p256_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
377 500
378// Reduce modulo field characteristic, z := x mod p_256k1 501// Reduce modulo field characteristic, z := x mod p_256k1
379// Input x[4]; output z[4] 502// Input x[4]; output z[4]
380extern void bignum_mod_p256k1_4 (uint64_t z[static 4], uint64_t x[static 4]); 503extern void bignum_mod_p256k1_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
381 504
382// Reduce modulo field characteristic, z := x mod p_384 505// Reduce modulo field characteristic, z := x mod p_384
383// Input x[k]; output z[6] 506// Input x[k]; output z[6]
384extern void bignum_mod_p384 (uint64_t z[static 6], uint64_t k, uint64_t *x); 507extern void bignum_mod_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t k, const uint64_t *x);
385extern void bignum_mod_p384_alt (uint64_t z[static 6], uint64_t k, uint64_t *x); 508extern void bignum_mod_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t k, const uint64_t *x);
386 509
387// Reduce modulo field characteristic, z := x mod p_384 510// Reduce modulo field characteristic, z := x mod p_384
388// Input x[6]; output z[6] 511// Input x[6]; output z[6]
389extern void bignum_mod_p384_6 (uint64_t z[static 6], uint64_t x[static 6]); 512extern void bignum_mod_p384_6 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]);
390 513
391// Reduce modulo field characteristic, z := x mod p_521 514// Reduce modulo field characteristic, z := x mod p_521
392// Input x[9]; output z[9] 515// Input x[9]; output z[9]
393extern void bignum_mod_p521_9 (uint64_t z[static 9], uint64_t x[static 9]); 516extern void bignum_mod_p521_9 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]);
517
518// Reduce modulo field characteristic, z := x mod p_sm2
519// Input x[k]; output z[4]
520extern void bignum_mod_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x);
521
522// Reduce modulo field characteristic, z := x mod p_sm2
523// Input x[4]; output z[4]
524extern void bignum_mod_sm2_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
394 525
395// Add modulo m, z := (x + y) mod m, assuming x and y reduced 526// Add modulo m, z := (x + y) mod m, assuming x and y reduced
396// Inputs x[k], y[k], m[k]; output z[k] 527// Inputs x[k], y[k], m[k]; output z[k]
397extern void bignum_modadd (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y, uint64_t *m); 528extern void bignum_modadd (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *y, const uint64_t *m);
398 529
399// Double modulo m, z := (2 * x) mod m, assuming x reduced 530// Double modulo m, z := (2 * x) mod m, assuming x reduced
400// Inputs x[k], m[k]; output z[k] 531// Inputs x[k], m[k]; output z[k]
401extern void bignum_moddouble (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *m); 532extern void bignum_moddouble (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *m);
533
534// Modular exponentiation for arbitrary odd modulus, z := (a^p) mod m
535// Inputs a[k], p[k], m[k]; output z[k], temporary buffer t[>=3*k]
536extern void bignum_modexp(uint64_t k,uint64_t *z, const uint64_t *a,const uint64_t *p,const uint64_t *m,uint64_t *t);
402 537
403// Compute "modification" constant z := 2^{64k} mod m 538// Compute "modification" constant z := 2^{64k} mod m
404// Input m[k]; output z[k]; temporary buffer t[>=k] 539// Input m[k]; output z[k]; temporary buffer t[>=k]
405extern void bignum_modifier (uint64_t k, uint64_t *z, uint64_t *m, uint64_t *t); 540extern void bignum_modifier (uint64_t k, uint64_t *z, const uint64_t *m, uint64_t *t);
406 541
407// Invert modulo m, z = (1/a) mod b, assuming b is an odd number > 1, a coprime to b 542// Invert modulo m, z = (1/a) mod b, assuming b is an odd number > 1, a coprime to b
408// Inputs a[k], b[k]; output z[k]; temporary buffer t[>=3*k] 543// Inputs a[k], b[k]; output z[k]; temporary buffer t[>=3*k]
409extern void bignum_modinv (uint64_t k, uint64_t *z, uint64_t *a, uint64_t *b, uint64_t *t); 544extern void bignum_modinv (uint64_t k, uint64_t *z, const uint64_t *a, const uint64_t *b, uint64_t *t);
410 545
411// Optionally negate modulo m, z := (-x) mod m (if p nonzero) or z := x (if p zero), assuming x reduced 546// Optionally negate modulo m, z := (-x) mod m (if p nonzero) or z := x (if p zero), assuming x reduced
412// Inputs p, x[k], m[k]; output z[k] 547// Inputs p, x[k], m[k]; output z[k]
413extern void bignum_modoptneg (uint64_t k, uint64_t *z, uint64_t p, uint64_t *x, uint64_t *m); 548extern void bignum_modoptneg (uint64_t k, uint64_t *z, uint64_t p, const uint64_t *x, const uint64_t *m);
414 549
415// Subtract modulo m, z := (x - y) mod m, assuming x and y reduced 550// Subtract modulo m, z := (x - y) mod m, assuming x and y reduced
416// Inputs x[k], y[k], m[k]; output z[k] 551// Inputs x[k], y[k], m[k]; output z[k]
417extern void bignum_modsub (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y, uint64_t *m); 552extern void bignum_modsub (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *y, const uint64_t *m);
418 553
419// Compute "montification" constant z := 2^{128k} mod m 554// Compute "montification" constant z := 2^{128k} mod m
420// Input m[k]; output z[k]; temporary buffer t[>=k] 555// Input m[k]; output z[k]; temporary buffer t[>=k]
421extern void bignum_montifier (uint64_t k, uint64_t *z, uint64_t *m, uint64_t *t); 556extern void bignum_montifier (uint64_t k, uint64_t *z, const uint64_t *m, uint64_t *t);
557
558// Montgomery inverse modulo p_256 = 2^256 - 2^224 + 2^192 + 2^96 - 1
559// Input x[4]; output z[4]
560extern void bignum_montinv_p256(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]);
561
562// Montgomery inverse modulo p_384 = 2^384 - 2^128 - 2^96 + 2^32 - 1
563// Input x[6]; output z[6]
564extern void bignum_montinv_p384(uint64_t z[S2N_BIGNUM_STATIC 6],const uint64_t x[S2N_BIGNUM_STATIC 6]);
565
566// Montgomery inverse modulo p_sm2 = 2^256 - 2^224 - 2^96 + 2^64 - 1
567// Input x[4]; output z[4]
568extern void bignum_montinv_sm2(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]);
422 569
423// Montgomery multiply, z := (x * y / 2^{64k}) mod m 570// Montgomery multiply, z := (x * y / 2^{64k}) mod m
424// Inputs x[k], y[k], m[k]; output z[k] 571// Inputs x[k], y[k], m[k]; output z[k]
425extern void bignum_montmul (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y, uint64_t *m); 572extern void bignum_montmul (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *y, const uint64_t *m);
426 573
427// Montgomery multiply, z := (x * y / 2^256) mod p_256 574// Montgomery multiply, z := (x * y / 2^256) mod p_256
428// Inputs x[4], y[4]; output z[4] 575// Inputs x[4], y[4]; output z[4]
429extern void bignum_montmul_p256 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); 576extern void bignum_montmul_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
430extern void bignum_montmul_p256_alt (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); 577extern void bignum_montmul_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
431 578
432// Montgomery multiply, z := (x * y / 2^256) mod p_256k1 579// Montgomery multiply, z := (x * y / 2^256) mod p_256k1
433// Inputs x[4], y[4]; output z[4] 580// Inputs x[4], y[4]; output z[4]
434extern void bignum_montmul_p256k1 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); 581extern void bignum_montmul_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
435extern void bignum_montmul_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); 582extern void bignum_montmul_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
436 583
437// Montgomery multiply, z := (x * y / 2^384) mod p_384 584// Montgomery multiply, z := (x * y / 2^384) mod p_384
438// Inputs x[6], y[6]; output z[6] 585// Inputs x[6], y[6]; output z[6]
439extern void bignum_montmul_p384 (uint64_t z[static 6], uint64_t x[static 6], uint64_t y[static 6]); 586extern void bignum_montmul_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]);
440extern void bignum_montmul_p384_alt (uint64_t z[static 6], uint64_t x[static 6], uint64_t y[static 6]); 587extern void bignum_montmul_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]);
441 588
442// Montgomery multiply, z := (x * y / 2^576) mod p_521 589// Montgomery multiply, z := (x * y / 2^576) mod p_521
443// Inputs x[9], y[9]; output z[9] 590// Inputs x[9], y[9]; output z[9]
444extern void bignum_montmul_p521 (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); 591extern void bignum_montmul_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9], const uint64_t y[S2N_BIGNUM_STATIC 9]);
445extern void bignum_montmul_p521_alt (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); 592extern void bignum_montmul_p521_alt (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9], const uint64_t y[S2N_BIGNUM_STATIC 9]);
593
594// Montgomery multiply, z := (x * y / 2^256) mod p_sm2
595// Inputs x[4], y[4]; output z[4]
596extern void bignum_montmul_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
597extern void bignum_montmul_sm2_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
446 598
447// Montgomery reduce, z := (x' / 2^{64p}) MOD m 599// Montgomery reduce, z := (x' / 2^{64p}) MOD m
448// Inputs x[n], m[k], p; output z[k] 600// Inputs x[n], m[k], p; output z[k]
449extern void bignum_montredc (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t *m, uint64_t p); 601extern void bignum_montredc (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x, const uint64_t *m, uint64_t p);
450 602
451// Montgomery square, z := (x^2 / 2^{64k}) mod m 603// Montgomery square, z := (x^2 / 2^{64k}) mod m
452// Inputs x[k], m[k]; output z[k] 604// Inputs x[k], m[k]; output z[k]
453extern void bignum_montsqr (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *m); 605extern void bignum_montsqr (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *m);
454 606
455// Montgomery square, z := (x^2 / 2^256) mod p_256 607// Montgomery square, z := (x^2 / 2^256) mod p_256
456// Input x[4]; output z[4] 608// Input x[4]; output z[4]
457extern void bignum_montsqr_p256 (uint64_t z[static 4], uint64_t x[static 4]); 609extern void bignum_montsqr_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
458extern void bignum_montsqr_p256_alt (uint64_t z[static 4], uint64_t x[static 4]); 610extern void bignum_montsqr_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
459 611
460// Montgomery square, z := (x^2 / 2^256) mod p_256k1 612// Montgomery square, z := (x^2 / 2^256) mod p_256k1
461// Input x[4]; output z[4] 613// Input x[4]; output z[4]
462extern void bignum_montsqr_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); 614extern void bignum_montsqr_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
463extern void bignum_montsqr_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4]); 615extern void bignum_montsqr_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
464 616
465// Montgomery square, z := (x^2 / 2^384) mod p_384 617// Montgomery square, z := (x^2 / 2^384) mod p_384
466// Input x[6]; output z[6] 618// Input x[6]; output z[6]
467extern void bignum_montsqr_p384 (uint64_t z[static 6], uint64_t x[static 6]); 619extern void bignum_montsqr_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]);
468extern void bignum_montsqr_p384_alt (uint64_t z[static 6], uint64_t x[static 6]); 620extern void bignum_montsqr_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]);
469 621
470// Montgomery square, z := (x^2 / 2^576) mod p_521 622// Montgomery square, z := (x^2 / 2^576) mod p_521
471// Input x[9]; output z[9] 623// Input x[9]; output z[9]
472extern void bignum_montsqr_p521 (uint64_t z[static 9], uint64_t x[static 9]); 624extern void bignum_montsqr_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]);
473extern void bignum_montsqr_p521_alt (uint64_t z[static 9], uint64_t x[static 9]); 625extern void bignum_montsqr_p521_alt (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]);
626
627// Montgomery square, z := (x^2 / 2^256) mod p_sm2
628// Input x[4]; output z[4]
629extern void bignum_montsqr_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
630extern void bignum_montsqr_sm2_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
474 631
475// Multiply z := x * y 632// Multiply z := x * y
476// Inputs x[m], y[n]; output z[k] 633// Inputs x[m], y[n]; output z[k]
477extern void bignum_mul (uint64_t k, uint64_t *z, uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); 634extern void bignum_mul (uint64_t k, uint64_t *z, uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y);
478 635
479// Multiply z := x * y 636// Multiply z := x * y
480// Inputs x[4], y[4]; output z[8] 637// Inputs x[4], y[4]; output z[8]
481extern void bignum_mul_4_8 (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]); 638extern void bignum_mul_4_8 (uint64_t z[S2N_BIGNUM_STATIC 8], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
482extern void bignum_mul_4_8_alt (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]); 639extern void bignum_mul_4_8_alt (uint64_t z[S2N_BIGNUM_STATIC 8], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
483 640
484// Multiply z := x * y 641// Multiply z := x * y
485// Inputs x[6], y[6]; output z[12] 642// Inputs x[6], y[6]; output z[12]
486extern void bignum_mul_6_12 (uint64_t z[static 12], uint64_t x[static 6], uint64_t y[static 6]); 643extern void bignum_mul_6_12 (uint64_t z[S2N_BIGNUM_STATIC 12], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]);
487extern void bignum_mul_6_12_alt (uint64_t z[static 12], uint64_t x[static 6], uint64_t y[static 6]); 644extern void bignum_mul_6_12_alt (uint64_t z[S2N_BIGNUM_STATIC 12], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]);
488 645
489// Multiply z := x * y 646// Multiply z := x * y
490// Inputs x[8], y[8]; output z[16] 647// Inputs x[8], y[8]; output z[16]
491extern void bignum_mul_8_16 (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]); 648extern void bignum_mul_8_16 (uint64_t z[S2N_BIGNUM_STATIC 16], const uint64_t x[S2N_BIGNUM_STATIC 8], const uint64_t y[S2N_BIGNUM_STATIC 8]);
492extern void bignum_mul_8_16_alt (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]); 649extern void bignum_mul_8_16_alt (uint64_t z[S2N_BIGNUM_STATIC 16], const uint64_t x[S2N_BIGNUM_STATIC 8], const uint64_t y[S2N_BIGNUM_STATIC 8]);
493 650
494// Multiply modulo p_25519, z := (x * y) mod p_25519 651// Multiply modulo p_25519, z := (x * y) mod p_25519
495// Inputs x[4], y[4]; output z[4] 652// Inputs x[4], y[4]; output z[4]
496extern void bignum_mul_p25519 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); 653extern void bignum_mul_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
497extern void bignum_mul_p25519_alt (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); 654extern void bignum_mul_p25519_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
498 655
499// Multiply modulo p_256k1, z := (x * y) mod p_256k1 656// Multiply modulo p_256k1, z := (x * y) mod p_256k1
500// Inputs x[4], y[4]; output z[4] 657// Inputs x[4], y[4]; output z[4]
501extern void bignum_mul_p256k1 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); 658extern void bignum_mul_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
502extern void bignum_mul_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); 659extern void bignum_mul_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
503 660
504// Multiply modulo p_521, z := (x * y) mod p_521, assuming x and y reduced 661// Multiply modulo p_521, z := (x * y) mod p_521, assuming x and y reduced
505// Inputs x[9], y[9]; output z[9] 662// Inputs x[9], y[9]; output z[9]
506extern void bignum_mul_p521 (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); 663extern void bignum_mul_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9], const uint64_t y[S2N_BIGNUM_STATIC 9]);
507extern void bignum_mul_p521_alt (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); 664extern void bignum_mul_p521_alt (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9], const uint64_t y[S2N_BIGNUM_STATIC 9]);
508 665
509// Multiply bignum by 10 and add word: z := 10 * z + d 666// Multiply bignum by 10 and add word: z := 10 * z + d
510// Inputs z[k], d; outputs function return (carry) and z[k] 667// Inputs z[k], d; outputs function return (carry) and z[k]
@@ -512,55 +669,59 @@ extern uint64_t bignum_muladd10 (uint64_t k, uint64_t *z, uint64_t d);
512 669
513// Multiplex/select z := x (if p nonzero) or z := y (if p zero) 670// Multiplex/select z := x (if p nonzero) or z := y (if p zero)
514// Inputs p, x[k], y[k]; output z[k] 671// Inputs p, x[k], y[k]; output z[k]
515extern void bignum_mux (uint64_t p, uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y); 672extern void bignum_mux (uint64_t p, uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *y);
516 673
517// 256-bit multiplex/select z := x (if p nonzero) or z := y (if p zero) 674// 256-bit multiplex/select z := x (if p nonzero) or z := y (if p zero)
518// Inputs p, x[4], y[4]; output z[4] 675// Inputs p, x[4], y[4]; output z[4]
519extern void bignum_mux_4 (uint64_t p, uint64_t z[static 4],uint64_t x[static 4], uint64_t y[static 4]); 676extern void bignum_mux_4 (uint64_t p, uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
520 677
521// 384-bit multiplex/select z := x (if p nonzero) or z := y (if p zero) 678// 384-bit multiplex/select z := x (if p nonzero) or z := y (if p zero)
522// Inputs p, x[6], y[6]; output z[6] 679// Inputs p, x[6], y[6]; output z[6]
523extern void bignum_mux_6 (uint64_t p, uint64_t z[static 6],uint64_t x[static 6], uint64_t y[static 6]); 680extern void bignum_mux_6 (uint64_t p, uint64_t z[S2N_BIGNUM_STATIC 6],const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]);
524 681
525// Select element from 16-element table, z := xs[k*i] 682// Select element from 16-element table, z := xs[k*i]
526// Inputs xs[16*k], i; output z[k] 683// Inputs xs[16*k], i; output z[k]
527extern void bignum_mux16 (uint64_t k, uint64_t *z, uint64_t *xs, uint64_t i); 684extern void bignum_mux16 (uint64_t k, uint64_t *z, const uint64_t *xs, uint64_t i);
528 685
529// Negate modulo p_25519, z := (-x) mod p_25519, assuming x reduced 686// Negate modulo p_25519, z := (-x) mod p_25519, assuming x reduced
530// Input x[4]; output z[4] 687// Input x[4]; output z[4]
531extern void bignum_neg_p25519 (uint64_t z[static 4], uint64_t x[static 4]); 688extern void bignum_neg_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
532 689
533// Negate modulo p_256, z := (-x) mod p_256, assuming x reduced 690// Negate modulo p_256, z := (-x) mod p_256, assuming x reduced
534// Input x[4]; output z[4] 691// Input x[4]; output z[4]
535extern void bignum_neg_p256 (uint64_t z[static 4], uint64_t x[static 4]); 692extern void bignum_neg_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
536 693
537// Negate modulo p_256k1, z := (-x) mod p_256k1, assuming x reduced 694// Negate modulo p_256k1, z := (-x) mod p_256k1, assuming x reduced
538// Input x[4]; output z[4] 695// Input x[4]; output z[4]
539extern void bignum_neg_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); 696extern void bignum_neg_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
540 697
541// Negate modulo p_384, z := (-x) mod p_384, assuming x reduced 698// Negate modulo p_384, z := (-x) mod p_384, assuming x reduced
542// Input x[6]; output z[6] 699// Input x[6]; output z[6]
543extern void bignum_neg_p384 (uint64_t z[static 6], uint64_t x[static 6]); 700extern void bignum_neg_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]);
544 701
545// Negate modulo p_521, z := (-x) mod p_521, assuming x reduced 702// Negate modulo p_521, z := (-x) mod p_521, assuming x reduced
546// Input x[9]; output z[9] 703// Input x[9]; output z[9]
547extern void bignum_neg_p521 (uint64_t z[static 9], uint64_t x[static 9]); 704extern void bignum_neg_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]);
705
706// Negate modulo p_sm2, z := (-x) mod p_sm2, assuming x reduced
707// Input x[4]; output z[4]
708extern void bignum_neg_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
548 709
549// Negated modular inverse, z := (-1/x) mod 2^{64k} 710// Negated modular inverse, z := (-1/x) mod 2^{64k}
550// Input x[k]; output z[k] 711// Input x[k]; output z[k]
551extern void bignum_negmodinv (uint64_t k, uint64_t *z, uint64_t *x); 712extern void bignum_negmodinv (uint64_t k, uint64_t *z, const uint64_t *x);
552 713
553// Test bignum for nonzero-ness x =/= 0 714// Test bignum for nonzero-ness x =/= 0
554// Input x[k]; output function return 715// Input x[k]; output function return
555extern uint64_t bignum_nonzero (uint64_t k, uint64_t *x); 716extern uint64_t bignum_nonzero (uint64_t k, const uint64_t *x);
556 717
557// Test 256-bit bignum for nonzero-ness x =/= 0 718// Test 256-bit bignum for nonzero-ness x =/= 0
558// Input x[4]; output function return 719// Input x[4]; output function return
559extern uint64_t bignum_nonzero_4(uint64_t x[static 4]); 720extern uint64_t bignum_nonzero_4(const uint64_t x[S2N_BIGNUM_STATIC 4]);
560 721
561// Test 384-bit bignum for nonzero-ness x =/= 0 722// Test 384-bit bignum for nonzero-ness x =/= 0
562// Input x[6]; output function return 723// Input x[6]; output function return
563extern uint64_t bignum_nonzero_6(uint64_t x[static 6]); 724extern uint64_t bignum_nonzero_6(const uint64_t x[S2N_BIGNUM_STATIC 6]);
564 725
565// Normalize bignum in-place by shifting left till top bit is 1 726// Normalize bignum in-place by shifting left till top bit is 1
566// Input z[k]; outputs function return (bits shifted left) and z[k] 727// Input z[k]; outputs function return (bits shifted left) and z[k]
@@ -568,7 +729,7 @@ extern uint64_t bignum_normalize (uint64_t k, uint64_t *z);
568 729
569// Test bignum for odd-ness 730// Test bignum for odd-ness
570// Input x[k]; output function return 731// Input x[k]; output function return
571extern uint64_t bignum_odd (uint64_t k, uint64_t *x); 732extern uint64_t bignum_odd (uint64_t k, const uint64_t *x);
572 733
573// Convert single digit to bignum, z := n 734// Convert single digit to bignum, z := n
574// Input n; output z[k] 735// Input n; output z[k]
@@ -576,39 +737,43 @@ extern void bignum_of_word (uint64_t k, uint64_t *z, uint64_t n);
576 737
577// Optionally add, z := x + y (if p nonzero) or z := x (if p zero) 738// Optionally add, z := x + y (if p nonzero) or z := x (if p zero)
578// Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k] 739// Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k]
579extern uint64_t bignum_optadd (uint64_t k, uint64_t *z, uint64_t *x, uint64_t p, uint64_t *y); 740extern uint64_t bignum_optadd (uint64_t k, uint64_t *z, const uint64_t *x, uint64_t p, const uint64_t *y);
580 741
581// Optionally negate, z := -x (if p nonzero) or z := x (if p zero) 742// Optionally negate, z := -x (if p nonzero) or z := x (if p zero)
582// Inputs p, x[k]; outputs function return (nonzero input) and z[k] 743// Inputs p, x[k]; outputs function return (nonzero input) and z[k]
583extern uint64_t bignum_optneg (uint64_t k, uint64_t *z, uint64_t p, uint64_t *x); 744extern uint64_t bignum_optneg (uint64_t k, uint64_t *z, uint64_t p, const uint64_t *x);
584 745
585// Optionally negate modulo p_25519, z := (-x) mod p_25519 (if p nonzero) or z := x (if p zero), assuming x reduced 746// Optionally negate modulo p_25519, z := (-x) mod p_25519 (if p nonzero) or z := x (if p zero), assuming x reduced
586// Inputs p, x[4]; output z[4] 747// Inputs p, x[4]; output z[4]
587extern void bignum_optneg_p25519 (uint64_t z[static 4], uint64_t p, uint64_t x[static 4]); 748extern void bignum_optneg_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t p, const uint64_t x[S2N_BIGNUM_STATIC 4]);
588 749
589// Optionally negate modulo p_256, z := (-x) mod p_256 (if p nonzero) or z := x (if p zero), assuming x reduced 750// Optionally negate modulo p_256, z := (-x) mod p_256 (if p nonzero) or z := x (if p zero), assuming x reduced
590// Inputs p, x[4]; output z[4] 751// Inputs p, x[4]; output z[4]
591extern void bignum_optneg_p256 (uint64_t z[static 4], uint64_t p, uint64_t x[static 4]); 752extern void bignum_optneg_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t p, const uint64_t x[S2N_BIGNUM_STATIC 4]);
592 753
593// Optionally negate modulo p_256k1, z := (-x) mod p_256k1 (if p nonzero) or z := x (if p zero), assuming x reduced 754// Optionally negate modulo p_256k1, z := (-x) mod p_256k1 (if p nonzero) or z := x (if p zero), assuming x reduced
594// Inputs p, x[4]; output z[4] 755// Inputs p, x[4]; output z[4]
595extern void bignum_optneg_p256k1 (uint64_t z[static 4], uint64_t p, uint64_t x[static 4]); 756extern void bignum_optneg_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t p, const uint64_t x[S2N_BIGNUM_STATIC 4]);
596 757
597// Optionally negate modulo p_384, z := (-x) mod p_384 (if p nonzero) or z := x (if p zero), assuming x reduced 758// Optionally negate modulo p_384, z := (-x) mod p_384 (if p nonzero) or z := x (if p zero), assuming x reduced
598// Inputs p, x[6]; output z[6] 759// Inputs p, x[6]; output z[6]
599extern void bignum_optneg_p384 (uint64_t z[static 6], uint64_t p, uint64_t x[static 6]); 760extern void bignum_optneg_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t p, const uint64_t x[S2N_BIGNUM_STATIC 6]);
600 761
601// Optionally negate modulo p_521, z := (-x) mod p_521 (if p nonzero) or z := x (if p zero), assuming x reduced 762// Optionally negate modulo p_521, z := (-x) mod p_521 (if p nonzero) or z := x (if p zero), assuming x reduced
602// Inputs p, x[9]; output z[9] 763// Inputs p, x[9]; output z[9]
603extern void bignum_optneg_p521 (uint64_t z[static 9], uint64_t p, uint64_t x[static 9]); 764extern void bignum_optneg_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], uint64_t p, const uint64_t x[S2N_BIGNUM_STATIC 9]);
765
766// Optionally negate modulo p_sm2, z := (-x) mod p_sm2 (if p nonzero) or z := x (if p zero), assuming x reduced
767// Inputs p, x[4]; output z[4]
768extern void bignum_optneg_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t p, const uint64_t x[S2N_BIGNUM_STATIC 4]);
604 769
605// Optionally subtract, z := x - y (if p nonzero) or z := x (if p zero) 770// Optionally subtract, z := x - y (if p nonzero) or z := x (if p zero)
606// Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k] 771// Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k]
607extern uint64_t bignum_optsub (uint64_t k, uint64_t *z, uint64_t *x, uint64_t p, uint64_t *y); 772extern uint64_t bignum_optsub (uint64_t k, uint64_t *z, const uint64_t *x, uint64_t p, const uint64_t *y);
608 773
609// Optionally subtract or add, z := x + sgn(p) * y interpreting p as signed 774// Optionally subtract or add, z := x + sgn(p) * y interpreting p as signed
610// Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k] 775// Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k]
611extern uint64_t bignum_optsubadd (uint64_t k, uint64_t *z, uint64_t *x, uint64_t p, uint64_t *y); 776extern uint64_t bignum_optsubadd (uint64_t k, uint64_t *z, const uint64_t *x, uint64_t p, const uint64_t *y);
612 777
613// Return bignum of power of 2, z := 2^n 778// Return bignum of power of 2, z := 2^n
614// Input n; output z[k] 779// Input n; output z[k]
@@ -616,216 +781,376 @@ extern void bignum_pow2 (uint64_t k, uint64_t *z, uint64_t n);
616 781
617// Shift bignum left by c < 64 bits z := x * 2^c 782// Shift bignum left by c < 64 bits z := x * 2^c
618// Inputs x[n], c; outputs function return (carry-out) and z[k] 783// Inputs x[n], c; outputs function return (carry-out) and z[k]
619extern uint64_t bignum_shl_small (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t c); 784extern uint64_t bignum_shl_small (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x, uint64_t c);
620 785
621// Shift bignum right by c < 64 bits z := floor(x / 2^c) 786// Shift bignum right by c < 64 bits z := floor(x / 2^c)
622// Inputs x[n], c; outputs function return (bits shifted out) and z[k] 787// Inputs x[n], c; outputs function return (bits shifted out) and z[k]
623extern uint64_t bignum_shr_small (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t c); 788extern uint64_t bignum_shr_small (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x, uint64_t c);
624 789
625// Square, z := x^2 790// Square, z := x^2
626// Input x[n]; output z[k] 791// Input x[n]; output z[k]
627extern void bignum_sqr (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x); 792extern void bignum_sqr (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x);
628 793
629// Square, z := x^2 794// Square, z := x^2
630// Input x[4]; output z[8] 795// Input x[4]; output z[8]
631extern void bignum_sqr_4_8 (uint64_t z[static 8], uint64_t x[static 4]); 796extern void bignum_sqr_4_8 (uint64_t z[S2N_BIGNUM_STATIC 8], const uint64_t x[S2N_BIGNUM_STATIC 4]);
632extern void bignum_sqr_4_8_alt (uint64_t z[static 8], uint64_t x[static 4]); 797extern void bignum_sqr_4_8_alt (uint64_t z[S2N_BIGNUM_STATIC 8], const uint64_t x[S2N_BIGNUM_STATIC 4]);
633 798
634// Square, z := x^2 799// Square, z := x^2
635// Input x[6]; output z[12] 800// Input x[6]; output z[12]
636extern void bignum_sqr_6_12 (uint64_t z[static 12], uint64_t x[static 6]); 801extern void bignum_sqr_6_12 (uint64_t z[S2N_BIGNUM_STATIC 12], const uint64_t x[S2N_BIGNUM_STATIC 6]);
637extern void bignum_sqr_6_12_alt (uint64_t z[static 12], uint64_t x[static 6]); 802extern void bignum_sqr_6_12_alt (uint64_t z[S2N_BIGNUM_STATIC 12], const uint64_t x[S2N_BIGNUM_STATIC 6]);
638 803
639// Square, z := x^2 804// Square, z := x^2
640// Input x[8]; output z[16] 805// Input x[8]; output z[16]
641extern void bignum_sqr_8_16 (uint64_t z[static 16], uint64_t x[static 8]); 806extern void bignum_sqr_8_16 (uint64_t z[S2N_BIGNUM_STATIC 16], const uint64_t x[S2N_BIGNUM_STATIC 8]);
642extern void bignum_sqr_8_16_alt (uint64_t z[static 16], uint64_t x[static 8]); 807extern void bignum_sqr_8_16_alt (uint64_t z[S2N_BIGNUM_STATIC 16], const uint64_t x[S2N_BIGNUM_STATIC 8]);
643 808
644// Square modulo p_25519, z := (x^2) mod p_25519 809// Square modulo p_25519, z := (x^2) mod p_25519
645// Input x[4]; output z[4] 810// Input x[4]; output z[4]
646extern void bignum_sqr_p25519 (uint64_t z[static 4], uint64_t x[static 4]); 811extern void bignum_sqr_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
647extern void bignum_sqr_p25519_alt (uint64_t z[static 4], uint64_t x[static 4]); 812extern void bignum_sqr_p25519_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
648 813
649// Square modulo p_256k1, z := (x^2) mod p_256k1 814// Square modulo p_256k1, z := (x^2) mod p_256k1
650// Input x[4]; output z[4] 815// Input x[4]; output z[4]
651extern void bignum_sqr_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); 816extern void bignum_sqr_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
652extern void bignum_sqr_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4]); 817extern void bignum_sqr_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
653 818
654// Square modulo p_521, z := (x^2) mod p_521, assuming x reduced 819// Square modulo p_521, z := (x^2) mod p_521, assuming x reduced
655// Input x[9]; output z[9] 820// Input x[9]; output z[9]
656extern void bignum_sqr_p521 (uint64_t z[static 9], uint64_t x[static 9]); 821extern void bignum_sqr_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]);
657extern void bignum_sqr_p521_alt (uint64_t z[static 9], uint64_t x[static 9]); 822extern void bignum_sqr_p521_alt (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]);
823
824// Square root modulo p_25519
825// Input x[4]; output function return (Legendre symbol) and z[4]
826extern int64_t bignum_sqrt_p25519(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]);
827extern int64_t bignum_sqrt_p25519_alt(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]);
658 828
659// Subtract, z := x - y 829// Subtract, z := x - y
660// Inputs x[m], y[n]; outputs function return (carry-out) and z[p] 830// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
661extern uint64_t bignum_sub (uint64_t p, uint64_t *z, uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); 831extern uint64_t bignum_sub (uint64_t p, uint64_t *z, uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y);
662 832
663// Subtract modulo p_25519, z := (x - y) mod p_25519, assuming x and y reduced 833// Subtract modulo p_25519, z := (x - y) mod p_25519, assuming x and y reduced
664// Inputs x[4], y[4]; output z[4] 834// Inputs x[4], y[4]; output z[4]
665extern void bignum_sub_p25519 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); 835extern void bignum_sub_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
666 836
667// Subtract modulo p_256, z := (x - y) mod p_256, assuming x and y reduced 837// Subtract modulo p_256, z := (x - y) mod p_256, assuming x and y reduced
668// Inputs x[4], y[4]; output z[4] 838// Inputs x[4], y[4]; output z[4]
669extern void bignum_sub_p256 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); 839extern void bignum_sub_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
670 840
671// Subtract modulo p_256k1, z := (x - y) mod p_256k1, assuming x and y reduced 841// Subtract modulo p_256k1, z := (x - y) mod p_256k1, assuming x and y reduced
672// Inputs x[4], y[4]; output z[4] 842// Inputs x[4], y[4]; output z[4]
673extern void bignum_sub_p256k1 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); 843extern void bignum_sub_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
674 844
675// Subtract modulo p_384, z := (x - y) mod p_384, assuming x and y reduced 845// Subtract modulo p_384, z := (x - y) mod p_384, assuming x and y reduced
676// Inputs x[6], y[6]; output z[6] 846// Inputs x[6], y[6]; output z[6]
677extern void bignum_sub_p384 (uint64_t z[static 6], uint64_t x[static 6], uint64_t y[static 6]); 847extern void bignum_sub_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]);
678 848
679// Subtract modulo p_521, z := (x - y) mod p_521, assuming x and y reduced 849// Subtract modulo p_521, z := (x - y) mod p_521, assuming x and y reduced
680// Inputs x[9], y[9]; output z[9] 850// Inputs x[9], y[9]; output z[9]
681extern void bignum_sub_p521 (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); 851extern void bignum_sub_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9], const uint64_t y[S2N_BIGNUM_STATIC 9]);
852
853// Subtract modulo p_sm2, z := (x - y) mod p_sm2, assuming x and y reduced
854// Inputs x[4], y[4]; output z[4]
855extern void bignum_sub_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]);
682 856
683// Convert 4-digit (256-bit) bignum to big-endian bytes 857// Convert 4-digit (256-bit) bignum to big-endian bytes
684// Input x[4]; output z[32] (bytes) 858// Input x[4]; output z[32] (bytes)
685extern void bignum_tobebytes_4 (uint8_t z[static 32], uint64_t x[static 4]); 859extern void bignum_tobebytes_4 (uint8_t z[S2N_BIGNUM_STATIC 32], const uint64_t x[S2N_BIGNUM_STATIC 4]);
686 860
687// Convert 6-digit (384-bit) bignum to big-endian bytes 861// Convert 6-digit (384-bit) bignum to big-endian bytes
688// Input x[6]; output z[48] (bytes) 862// Input x[6]; output z[48] (bytes)
689extern void bignum_tobebytes_6 (uint8_t z[static 48], uint64_t x[static 6]); 863extern void bignum_tobebytes_6 (uint8_t z[S2N_BIGNUM_STATIC 48], const uint64_t x[S2N_BIGNUM_STATIC 6]);
690 864
691// Convert 4-digit (256-bit) bignum to little-endian bytes 865// Convert 4-digit (256-bit) bignum to little-endian bytes
692// Input x[4]; output z[32] (bytes) 866// Input x[4]; output z[32] (bytes)
693extern void bignum_tolebytes_4 (uint8_t z[static 32], uint64_t x[static 4]); 867extern void bignum_tolebytes_4 (uint8_t z[S2N_BIGNUM_STATIC 32], const uint64_t x[S2N_BIGNUM_STATIC 4]);
694 868
695// Convert 6-digit (384-bit) bignum to little-endian bytes 869// Convert 6-digit (384-bit) bignum to little-endian bytes
696// Input x[6]; output z[48] (bytes) 870// Input x[6]; output z[48] (bytes)
697extern void bignum_tolebytes_6 (uint8_t z[static 48], uint64_t x[static 6]); 871extern void bignum_tolebytes_6 (uint8_t z[S2N_BIGNUM_STATIC 48], const uint64_t x[S2N_BIGNUM_STATIC 6]);
698 872
699// Convert 9-digit 528-bit bignum to little-endian bytes 873// Convert 9-digit 528-bit bignum to little-endian bytes
700// Input x[6]; output z[66] (bytes) 874// Input x[6]; output z[66] (bytes)
701extern void bignum_tolebytes_p521 (uint8_t z[static 66], uint64_t x[static 9]); 875extern void bignum_tolebytes_p521 (uint8_t z[S2N_BIGNUM_STATIC 66], const uint64_t x[S2N_BIGNUM_STATIC 9]);
702 876
703// Convert to Montgomery form z := (2^256 * x) mod p_256 877// Convert to Montgomery form z := (2^256 * x) mod p_256
704// Input x[4]; output z[4] 878// Input x[4]; output z[4]
705extern void bignum_tomont_p256 (uint64_t z[static 4], uint64_t x[static 4]); 879extern void bignum_tomont_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
706extern void bignum_tomont_p256_alt (uint64_t z[static 4], uint64_t x[static 4]); 880extern void bignum_tomont_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
707 881
708// Convert to Montgomery form z := (2^256 * x) mod p_256k1 882// Convert to Montgomery form z := (2^256 * x) mod p_256k1
709// Input x[4]; output z[4] 883// Input x[4]; output z[4]
710extern void bignum_tomont_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); 884extern void bignum_tomont_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
711extern void bignum_tomont_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4]); 885extern void bignum_tomont_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
712 886
713// Convert to Montgomery form z := (2^384 * x) mod p_384 887// Convert to Montgomery form z := (2^384 * x) mod p_384
714// Input x[6]; output z[6] 888// Input x[6]; output z[6]
715extern void bignum_tomont_p384 (uint64_t z[static 6], uint64_t x[static 6]); 889extern void bignum_tomont_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]);
716extern void bignum_tomont_p384_alt (uint64_t z[static 6], uint64_t x[static 6]); 890extern void bignum_tomont_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]);
717 891
718// Convert to Montgomery form z := (2^576 * x) mod p_521 892// Convert to Montgomery form z := (2^576 * x) mod p_521
719// Input x[9]; output z[9] 893// Input x[9]; output z[9]
720extern void bignum_tomont_p521 (uint64_t z[static 9], uint64_t x[static 9]); 894extern void bignum_tomont_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]);
895
896// Convert to Montgomery form z := (2^256 * x) mod p_sm2
897// Input x[4]; output z[4]
898extern void bignum_tomont_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
721 899
722// Triple modulo p_256, z := (3 * x) mod p_256 900// Triple modulo p_256, z := (3 * x) mod p_256
723// Input x[4]; output z[4] 901// Input x[4]; output z[4]
724extern void bignum_triple_p256 (uint64_t z[static 4], uint64_t x[static 4]); 902extern void bignum_triple_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
725extern void bignum_triple_p256_alt (uint64_t z[static 4], uint64_t x[static 4]); 903extern void bignum_triple_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
726 904
727// Triple modulo p_256k1, z := (3 * x) mod p_256k1 905// Triple modulo p_256k1, z := (3 * x) mod p_256k1
728// Input x[4]; output z[4] 906// Input x[4]; output z[4]
729extern void bignum_triple_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); 907extern void bignum_triple_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
730extern void bignum_triple_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4]); 908extern void bignum_triple_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
731 909
732// Triple modulo p_384, z := (3 * x) mod p_384 910// Triple modulo p_384, z := (3 * x) mod p_384
733// Input x[6]; output z[6] 911// Input x[6]; output z[6]
734extern void bignum_triple_p384 (uint64_t z[static 6], uint64_t x[static 6]); 912extern void bignum_triple_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]);
735extern void bignum_triple_p384_alt (uint64_t z[static 6], uint64_t x[static 6]); 913extern void bignum_triple_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]);
736 914
737// Triple modulo p_521, z := (3 * x) mod p_521, assuming x reduced 915// Triple modulo p_521, z := (3 * x) mod p_521, assuming x reduced
738// Input x[9]; output z[9] 916// Input x[9]; output z[9]
739extern void bignum_triple_p521 (uint64_t z[static 9], uint64_t x[static 9]); 917extern void bignum_triple_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]);
740extern void bignum_triple_p521_alt (uint64_t z[static 9], uint64_t x[static 9]); 918extern void bignum_triple_p521_alt (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]);
919
920// Triple modulo p_sm2, z := (3 * x) mod p_sm2
921// Input x[4]; output z[4]
922extern void bignum_triple_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
923extern void bignum_triple_sm2_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
741 924
742// Montgomery ladder step for curve25519 925// Montgomery ladder step for curve25519
743// Inputs point[8], pp[16], b; output rr[16] 926// Inputs point[8], pp[16], b; output rr[16]
744extern void curve25519_ladderstep(uint64_t rr[16],uint64_t point[8],uint64_t pp[16],uint64_t b); 927extern void curve25519_ladderstep(uint64_t rr[16],const uint64_t point[8],const uint64_t pp[16],uint64_t b);
745extern void curve25519_ladderstep_alt(uint64_t rr[16],uint64_t point[8],uint64_t pp[16],uint64_t b); 928extern void curve25519_ladderstep_alt(uint64_t rr[16],const uint64_t point[8],const uint64_t pp[16],uint64_t b);
746 929
747// Projective scalar multiplication, x coordinate only, for curve25519 930// Projective scalar multiplication, x coordinate only, for curve25519
748// Inputs scalar[4], point[4]; output res[8] 931// Inputs scalar[4], point[4]; output res[8]
749extern void curve25519_pxscalarmul(uint64_t res[static 8],uint64_t scalar[static 4],uint64_t point[static 4]); 932extern void curve25519_pxscalarmul(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 4]);
750extern void curve25519_pxscalarmul_alt(uint64_t res[static 8],uint64_t scalar[static 4],uint64_t point[static 4]); 933extern void curve25519_pxscalarmul_alt(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 4]);
751 934
752// x25519 function for curve25519 935// x25519 function for curve25519
753// Inputs scalar[4], point[4]; output res[4] 936// Inputs scalar[4], point[4]; output res[4]
754extern void curve25519_x25519(uint64_t res[static 4],uint64_t scalar[static 4],uint64_t point[static 4]); 937extern void curve25519_x25519(uint64_t res[S2N_BIGNUM_STATIC 4],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 4]);
755extern void curve25519_x25519_alt(uint64_t res[static 4],uint64_t scalar[static 4],uint64_t point[static 4]); 938extern void curve25519_x25519_alt(uint64_t res[S2N_BIGNUM_STATIC 4],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 4]);
939
940// x25519 function for curve25519 (byte array arguments)
941// Inputs scalar[32] (bytes), point[32] (bytes); output res[32] (bytes)
942extern void curve25519_x25519_byte(uint8_t res[S2N_BIGNUM_STATIC 32],const uint8_t scalar[S2N_BIGNUM_STATIC 32],const uint8_t point[S2N_BIGNUM_STATIC 32]);
943extern void curve25519_x25519_byte_alt(uint8_t res[S2N_BIGNUM_STATIC 32],const uint8_t scalar[S2N_BIGNUM_STATIC 32],const uint8_t point[S2N_BIGNUM_STATIC 32]);
756 944
757// x25519 function for curve25519 on base element 9 945// x25519 function for curve25519 on base element 9
758// Input scalar[4]; output res[4] 946// Input scalar[4]; output res[4]
759extern void curve25519_x25519base(uint64_t res[static 4],uint64_t scalar[static 4]); 947extern void curve25519_x25519base(uint64_t res[S2N_BIGNUM_STATIC 4],const uint64_t scalar[S2N_BIGNUM_STATIC 4]);
760extern void curve25519_x25519base_alt(uint64_t res[static 4],uint64_t scalar[static 4]); 948extern void curve25519_x25519base_alt(uint64_t res[S2N_BIGNUM_STATIC 4],const uint64_t scalar[S2N_BIGNUM_STATIC 4]);
949
950// x25519 function for curve25519 on base element 9 (byte array arguments)
951// Input scalar[32] (bytes); output res[32] (bytes)
952extern void curve25519_x25519base_byte(uint8_t res[S2N_BIGNUM_STATIC 32],const uint8_t scalar[S2N_BIGNUM_STATIC 32]);
953extern void curve25519_x25519base_byte_alt(uint8_t res[S2N_BIGNUM_STATIC 32],const uint8_t scalar[S2N_BIGNUM_STATIC 32]);
954
955// Decode compressed 256-bit form of edwards25519 point
956// Input c[32] (bytes); output function return and z[8]
957extern uint64_t edwards25519_decode(uint64_t z[S2N_BIGNUM_STATIC 8], const uint8_t c[S2N_BIGNUM_STATIC 32]);
958extern uint64_t edwards25519_decode_alt(uint64_t z[S2N_BIGNUM_STATIC 8], const uint8_t c[S2N_BIGNUM_STATIC 32]);
959
960// Encode edwards25519 point into compressed form as 256-bit number
961// Input p[8]; output z[32] (bytes)
962extern void edwards25519_encode(uint8_t z[S2N_BIGNUM_STATIC 32], const uint64_t p[S2N_BIGNUM_STATIC 8]);
761 963
762// Extended projective addition for edwards25519 964// Extended projective addition for edwards25519
763// Inputs p1[16], p2[16]; output p3[16] 965// Inputs p1[16], p2[16]; output p3[16]
764extern void edwards25519_epadd(uint64_t p3[static 16],uint64_t p1[static 16],uint64_t p2[static 16]); 966extern void edwards25519_epadd(uint64_t p3[S2N_BIGNUM_STATIC 16],const uint64_t p1[S2N_BIGNUM_STATIC 16],const uint64_t p2[S2N_BIGNUM_STATIC 16]);
765extern void edwards25519_epadd_alt(uint64_t p3[static 16],uint64_t p1[static 16],uint64_t p2[static 16]); 967extern void edwards25519_epadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 16],const uint64_t p1[S2N_BIGNUM_STATIC 16],const uint64_t p2[S2N_BIGNUM_STATIC 16]);
766 968
767// Extended projective doubling for edwards25519 969// Extended projective doubling for edwards25519
768// Inputs p1[12]; output p3[16] 970// Inputs p1[12]; output p3[16]
769extern void edwards25519_epdouble(uint64_t p3[static 16],uint64_t p1[static 12]); 971extern void edwards25519_epdouble(uint64_t p3[S2N_BIGNUM_STATIC 16],const uint64_t p1[S2N_BIGNUM_STATIC 12]);
770extern void edwards25519_epdouble_alt(uint64_t p3[static 16],uint64_t p1[static 12]); 972extern void edwards25519_epdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 16],const uint64_t p1[S2N_BIGNUM_STATIC 12]);
771 973
772// Projective doubling for edwards25519 974// Projective doubling for edwards25519
773// Inputs p1[12]; output p3[12] 975// Inputs p1[12]; output p3[12]
774extern void edwards25519_pdouble(uint64_t p3[static 12],uint64_t p1[static 12]); 976extern void edwards25519_pdouble(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]);
775extern void edwards25519_pdouble_alt(uint64_t p3[static 12],uint64_t p1[static 12]); 977extern void edwards25519_pdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]);
776 978
777// Extended projective + precomputed mixed addition for edwards25519 979// Extended projective + precomputed mixed addition for edwards25519
778// Inputs p1[16], p2[12]; output p3[16] 980// Inputs p1[16], p2[12]; output p3[16]
779extern void edwards25519_pepadd(uint64_t p3[static 16],uint64_t p1[static 16],uint64_t p2[static 12]); 981extern void edwards25519_pepadd(uint64_t p3[S2N_BIGNUM_STATIC 16],const uint64_t p1[S2N_BIGNUM_STATIC 16],const uint64_t p2[S2N_BIGNUM_STATIC 12]);
780extern void edwards25519_pepadd_alt(uint64_t p3[static 16],uint64_t p1[static 16],uint64_t p2[static 12]); 982extern void edwards25519_pepadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 16],const uint64_t p1[S2N_BIGNUM_STATIC 16],const uint64_t p2[S2N_BIGNUM_STATIC 12]);
983
984// Scalar multiplication by standard basepoint for edwards25519 (Ed25519)
985// Input scalar[4]; output res[8]
986extern void edwards25519_scalarmulbase(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4]);
987extern void edwards25519_scalarmulbase_alt(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4]);
988
989// Double scalar multiplication for edwards25519, fresh and base point
990// Input scalar[4], point[8], bscalar[4]; output res[8]
991extern void edwards25519_scalarmuldouble(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4], const uint64_t point[S2N_BIGNUM_STATIC 8],const uint64_t bscalar[S2N_BIGNUM_STATIC 4]);
992extern void edwards25519_scalarmuldouble_alt(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4], const uint64_t point[S2N_BIGNUM_STATIC 8],const uint64_t bscalar[S2N_BIGNUM_STATIC 4]);
993
994// Scalar product of 2-element polynomial vectors in NTT domain, with mulcache
995// Inputs a[512], b[512], bt[256] (signed 16-bit words); output r[256] (signed 16-bit words)
996extern void mlkem_basemul_k2(int16_t r[S2N_BIGNUM_STATIC 256],const int16_t a[S2N_BIGNUM_STATIC 512],const int16_t b[S2N_BIGNUM_STATIC 512],const int16_t bt[S2N_BIGNUM_STATIC 256]);
997
998// Scalar product of 3-element polynomial vectors in NTT domain, with mulcache
999// Inputs a[768], b[768], bt[384] (signed 16-bit words); output r[256] (signed 16-bit words)
1000extern void mlkem_basemul_k3(int16_t r[S2N_BIGNUM_STATIC 256],const int16_t a[S2N_BIGNUM_STATIC 768],const int16_t b[S2N_BIGNUM_STATIC 768],const int16_t bt[S2N_BIGNUM_STATIC 384]);
1001
1002// Scalar product of 4-element polynomial vectors in NTT domain, with mulcache
1003// Inputs a[1024], b[1024], bt[512] (signed 16-bit words); output r[256] (signed 16-bit words)
1004extern void mlkem_basemul_k4(int16_t r[S2N_BIGNUM_STATIC 256],const int16_t a[S2N_BIGNUM_STATIC 1024],const int16_t b[S2N_BIGNUM_STATIC 1024],const int16_t bt[S2N_BIGNUM_STATIC 512]);
1005
1006// Inverse number-theoretic transform from ML-KEM
1007// Input a[256] (signed 16-bit words), z_01234[80] (signed 16-bit words), z_56[384] (signed 16-bit words); output a[256] (signed 16-bit words)
1008extern void mlkem_intt(int16_t a[S2N_BIGNUM_STATIC 256],const int16_t z_01234[S2N_BIGNUM_STATIC 80],const int16_t z_56[S2N_BIGNUM_STATIC 384]);
1009
1010// Precompute the mulcache data for a polynomial in the NTT domain
1011// Inputs a[256], z[128] and t[128] (signed 16-bit words); output x[128] (signed 16-bit words)
1012extern void mlkem_mulcache_compute(int16_t x[S2N_BIGNUM_STATIC 128],const int16_t a[S2N_BIGNUM_STATIC 256],const int16_t z[S2N_BIGNUM_STATIC 128],const int16_t t[S2N_BIGNUM_STATIC 128]);
1013
1014// Forward number-theoretic transform from ML-KEM
1015// Input a[256] (signed 16-bit words), z_01234[80] (signed 16-bit words), z_56[384] (signed 16-bit words); output a[256] (signed 16-bit words)
1016extern void mlkem_ntt(int16_t a[S2N_BIGNUM_STATIC 256],const int16_t z_01234[S2N_BIGNUM_STATIC 80],const int16_t z_56[S2N_BIGNUM_STATIC 384]);
1017
1018// Canonical modular reduction of polynomial coefficients for ML-KEM
1019// Input a[256] (signed 16-bit words); output a[256] (signed 16-bit words)
1020extern void mlkem_reduce(int16_t a[S2N_BIGNUM_STATIC 256]);
1021
1022// Pack ML-KEM polynomial coefficients as 12-bit numbers
1023// Input a[256] (signed 16-bit words); output r[384] (bytes)
1024extern void mlkem_tobytes(uint8_t r[S2N_BIGNUM_STATIC 384],const int16_t a[S2N_BIGNUM_STATIC 256]);
1025
1026// Conversion of ML-KEM polynomial coefficients to Montgomery form
1027// Input a[256] (signed 16-bit words); output a[256] (signed 16-bit words)
1028extern void mlkem_tomont(int16_t a[S2N_BIGNUM_STATIC 256]);
1029
1030// Uniform rejection sampling for ML-KEM
1031// Inputs *buf (unsigned bytes), buflen, table (unsigned bytes); output r[256] (signed 16-bit words), return
1032extern uint64_t mlkem_rej_uniform_VARIABLE_TIME(int16_t r[S2N_BIGNUM_STATIC 256],const uint8_t *buf,uint64_t buflen,const uint8_t *table);
781 1033
782// Point addition on NIST curve P-256 in Montgomery-Jacobian coordinates 1034// Point addition on NIST curve P-256 in Montgomery-Jacobian coordinates
783// Inputs p1[12], p2[12]; output p3[12] 1035// Inputs p1[12], p2[12]; output p3[12]
784extern void p256_montjadd(uint64_t p3[static 12],uint64_t p1[static 12],uint64_t p2[static 12]); 1036extern void p256_montjadd(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 12]);
1037extern void p256_montjadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 12]);
785 1038
786// Point doubling on NIST curve P-256 in Montgomery-Jacobian coordinates 1039// Point doubling on NIST curve P-256 in Montgomery-Jacobian coordinates
787// Inputs p1[12]; output p3[12] 1040// Inputs p1[12]; output p3[12]
788extern void p256_montjdouble(uint64_t p3[static 12],uint64_t p1[static 12]); 1041extern void p256_montjdouble(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]);
1042extern void p256_montjdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]);
789 1043
790// Point mixed addition on NIST curve P-256 in Montgomery-Jacobian coordinates 1044// Point mixed addition on NIST curve P-256 in Montgomery-Jacobian coordinates
791// Inputs p1[12], p2[8]; output p3[12] 1045// Inputs p1[12], p2[8]; output p3[12]
792extern void p256_montjmixadd(uint64_t p3[static 12],uint64_t p1[static 12],uint64_t p2[static 8]); 1046extern void p256_montjmixadd(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 8]);
1047extern void p256_montjmixadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 8]);
1048
1049// Montgomery-Jacobian form scalar multiplication for P-256
1050// Input scalar[4], point[12]; output res[12]
1051extern void p256_montjscalarmul(uint64_t res[S2N_BIGNUM_STATIC 12],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 12]);
1052extern void p256_montjscalarmul_alt(uint64_t res[S2N_BIGNUM_STATIC 12],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 12]);
1053
1054// Scalar multiplication for NIST curve P-256
1055// Input scalar[4], point[8]; output res[8]
1056extern void p256_scalarmul(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 8]);
1057extern void p256_scalarmul_alt(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 8]);
1058
1059// Scalar multiplication for precomputed point on NIST curve P-256
1060// Input scalar[4], blocksize, table[]; output res[8]
1061extern void p256_scalarmulbase(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4],uint64_t blocksize,const uint64_t *table);
1062extern void p256_scalarmulbase_alt(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4],uint64_t blocksize,const uint64_t *table);
793 1063
794// Point addition on NIST curve P-384 in Montgomery-Jacobian coordinates 1064// Point addition on NIST curve P-384 in Montgomery-Jacobian coordinates
795// Inputs p1[18], p2[18]; output p3[18] 1065// Inputs p1[18], p2[18]; output p3[18]
796extern void p384_montjadd(uint64_t p3[static 18],uint64_t p1[static 18],uint64_t p2[static 18]); 1066extern void p384_montjadd(uint64_t p3[S2N_BIGNUM_STATIC 18],const uint64_t p1[S2N_BIGNUM_STATIC 18],const uint64_t p2[S2N_BIGNUM_STATIC 18]);
1067extern void p384_montjadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 18],const uint64_t p1[S2N_BIGNUM_STATIC 18],const uint64_t p2[S2N_BIGNUM_STATIC 18]);
797 1068
798// Point doubling on NIST curve P-384 in Montgomery-Jacobian coordinates 1069// Point doubling on NIST curve P-384 in Montgomery-Jacobian coordinates
799// Inputs p1[18]; output p3[18] 1070// Inputs p1[18]; output p3[18]
800extern void p384_montjdouble(uint64_t p3[static 18],uint64_t p1[static 18]); 1071extern void p384_montjdouble(uint64_t p3[S2N_BIGNUM_STATIC 18],const uint64_t p1[S2N_BIGNUM_STATIC 18]);
1072extern void p384_montjdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 18],const uint64_t p1[S2N_BIGNUM_STATIC 18]);
801 1073
802// Point mixed addition on NIST curve P-384 in Montgomery-Jacobian coordinates 1074// Point mixed addition on NIST curve P-384 in Montgomery-Jacobian coordinates
803// Inputs p1[18], p2[12]; output p3[18] 1075// Inputs p1[18], p2[12]; output p3[18]
804extern void p384_montjmixadd(uint64_t p3[static 18],uint64_t p1[static 18],uint64_t p2[static 12]); 1076extern void p384_montjmixadd(uint64_t p3[S2N_BIGNUM_STATIC 18],const uint64_t p1[S2N_BIGNUM_STATIC 18],const uint64_t p2[S2N_BIGNUM_STATIC 12]);
1077extern void p384_montjmixadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 18],const uint64_t p1[S2N_BIGNUM_STATIC 18],const uint64_t p2[S2N_BIGNUM_STATIC 12]);
1078
1079// Montgomery-Jacobian form scalar multiplication for P-384
1080// Input scalar[6], point[18]; output res[18]
1081extern void p384_montjscalarmul(uint64_t res[S2N_BIGNUM_STATIC 18],const uint64_t scalar[S2N_BIGNUM_STATIC 6],const uint64_t point[S2N_BIGNUM_STATIC 18]);
1082extern void p384_montjscalarmul_alt(uint64_t res[S2N_BIGNUM_STATIC 18],const uint64_t scalar[S2N_BIGNUM_STATIC 6],const uint64_t point[S2N_BIGNUM_STATIC 18]);
805 1083
806// Point addition on NIST curve P-521 in Jacobian coordinates 1084// Point addition on NIST curve P-521 in Jacobian coordinates
807// Inputs p1[27], p2[27]; output p3[27] 1085// Inputs p1[27], p2[27]; output p3[27]
808extern void p521_jadd(uint64_t p3[static 27],uint64_t p1[static 27],uint64_t p2[static 27]); 1086extern void p521_jadd(uint64_t p3[S2N_BIGNUM_STATIC 27],const uint64_t p1[S2N_BIGNUM_STATIC 27],const uint64_t p2[S2N_BIGNUM_STATIC 27]);
1087extern void p521_jadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 27],const uint64_t p1[S2N_BIGNUM_STATIC 27],const uint64_t p2[S2N_BIGNUM_STATIC 27]);
809 1088
810// Point doubling on NIST curve P-521 in Jacobian coordinates 1089// Point doubling on NIST curve P-521 in Jacobian coordinates
811// Input p1[27]; output p3[27] 1090// Input p1[27]; output p3[27]
812extern void p521_jdouble(uint64_t p3[static 27],uint64_t p1[static 27]); 1091extern void p521_jdouble(uint64_t p3[S2N_BIGNUM_STATIC 27],const uint64_t p1[S2N_BIGNUM_STATIC 27]);
1092extern void p521_jdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 27],const uint64_t p1[S2N_BIGNUM_STATIC 27]);
813 1093
814// Point mixed addition on NIST curve P-521 in Jacobian coordinates 1094// Point mixed addition on NIST curve P-521 in Jacobian coordinates
815// Inputs p1[27], p2[18]; output p3[27] 1095// Inputs p1[27], p2[18]; output p3[27]
816extern void p521_jmixadd(uint64_t p3[static 27],uint64_t p1[static 27],uint64_t p2[static 18]); 1096extern void p521_jmixadd(uint64_t p3[S2N_BIGNUM_STATIC 27],const uint64_t p1[S2N_BIGNUM_STATIC 27],const uint64_t p2[S2N_BIGNUM_STATIC 18]);
1097extern void p521_jmixadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 27],const uint64_t p1[S2N_BIGNUM_STATIC 27],const uint64_t p2[S2N_BIGNUM_STATIC 18]);
1098
1099// Jacobian form scalar multiplication for P-521
1100// Input scalar[9], point[27]; output res[27]
1101extern void p521_jscalarmul(uint64_t res[S2N_BIGNUM_STATIC 27],const uint64_t scalar[S2N_BIGNUM_STATIC 9],const uint64_t point[S2N_BIGNUM_STATIC 27]);
1102extern void p521_jscalarmul_alt(uint64_t res[S2N_BIGNUM_STATIC 27],const uint64_t scalar[S2N_BIGNUM_STATIC 9],const uint64_t point[S2N_BIGNUM_STATIC 27]);
817 1103
818// Point addition on SECG curve secp256k1 in Jacobian coordinates 1104// Point addition on SECG curve secp256k1 in Jacobian coordinates
819// Inputs p1[12], p2[12]; output p3[12] 1105// Inputs p1[12], p2[12]; output p3[12]
820extern void secp256k1_jadd(uint64_t p3[static 12],uint64_t p1[static 12],uint64_t p2[static 12]); 1106extern void secp256k1_jadd(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 12]);
1107extern void secp256k1_jadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 12]);
821 1108
822// Point doubling on SECG curve secp256k1 in Jacobian coordinates 1109// Point doubling on SECG curve secp256k1 in Jacobian coordinates
823// Input p1[12]; output p3[12] 1110// Input p1[12]; output p3[12]
824extern void secp256k1_jdouble(uint64_t p3[static 12],uint64_t p1[static 12]); 1111extern void secp256k1_jdouble(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]);
1112extern void secp256k1_jdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]);
825 1113
826// Point mixed addition on SECG curve secp256k1 in Jacobian coordinates 1114// Point mixed addition on SECG curve secp256k1 in Jacobian coordinates
827// Inputs p1[12], p2[8]; output p3[12] 1115// Inputs p1[12], p2[8]; output p3[12]
828extern void secp256k1_jmixadd(uint64_t p3[static 12],uint64_t p1[static 12],uint64_t p2[static 8]); 1116extern void secp256k1_jmixadd(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 8]);
1117extern void secp256k1_jmixadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 8]);
1118
1119// Keccak-f1600 permutation for SHA3
1120// Inputs a[25], rc[24]; output a[25]
1121extern void sha3_keccak_f1600(uint64_t a[S2N_BIGNUM_STATIC 25],const uint64_t rc[S2N_BIGNUM_STATIC 24]);
1122extern void sha3_keccak_f1600_alt(uint64_t a[S2N_BIGNUM_STATIC 25],const uint64_t rc[S2N_BIGNUM_STATIC 24]);
1123
1124// Batched 2-way Keccak-f1600 permutation for SHA3
1125// Inputs a[50], rc[24]; output a[50]
1126extern void sha3_keccak2_f1600(uint64_t a[S2N_BIGNUM_STATIC 50],const uint64_t rc[S2N_BIGNUM_STATIC 24]);
1127extern void sha3_keccak2_f1600_alt(uint64_t a[S2N_BIGNUM_STATIC 50],const uint64_t rc[S2N_BIGNUM_STATIC 24]);
1128
1129// Batched 4-way Keccak-f1600 permutation for SHA3
1130// Inputs a[100], rc[24]; output a[100]
1131extern void sha3_keccak4_f1600(uint64_t a[S2N_BIGNUM_STATIC 100],const uint64_t rc[S2N_BIGNUM_STATIC 24]);
1132extern void sha3_keccak4_f1600_alt(uint64_t a[S2N_BIGNUM_STATIC 100],const uint64_t rc[S2N_BIGNUM_STATIC 24]);
1133extern void sha3_keccak4_f1600_alt2(uint64_t a[S2N_BIGNUM_STATIC 100],const uint64_t rc[S2N_BIGNUM_STATIC 24]);
1134
1135// Point addition on CC curve SM2 in Montgomery-Jacobian coordinates
1136// Inputs p1[12], p2[12]; output p3[12]
1137extern void sm2_montjadd(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 12]);
1138extern void sm2_montjadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 12]);
1139
1140// Point doubling on CC curve SM2 in Montgomery-Jacobian coordinates
1141// Inputs p1[12]; output p3[12]
1142extern void sm2_montjdouble(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]);
1143extern void sm2_montjdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]);
1144
1145// Point mixed addition on CC curve SM2 in Montgomery-Jacobian coordinates
1146// Inputs p1[12], p2[8]; output p3[12]
1147extern void sm2_montjmixadd(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 8]);
1148extern void sm2_montjmixadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 8]);
1149
1150// Montgomery-Jacobian form scalar multiplication for CC curve SM2
1151// Input scalar[4], point[12]; output res[12]
1152extern void sm2_montjscalarmul(uint64_t res[S2N_BIGNUM_STATIC 12],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 12]);
1153extern void sm2_montjscalarmul_alt(uint64_t res[S2N_BIGNUM_STATIC 12],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 12]);
829 1154
830// Reverse the bytes in a single word 1155// Reverse the bytes in a single word
831// Input a; output function return 1156// Input a; output function return
@@ -839,6 +1164,10 @@ extern uint64_t word_clz (uint64_t a);
839// Input a; output function return 1164// Input a; output function return
840extern uint64_t word_ctz (uint64_t a); 1165extern uint64_t word_ctz (uint64_t a);
841 1166
1167// Perform 59 "divstep" iterations and return signed matrix of updates
1168// Inputs d, f, g; output m[2][2] and function return
1169extern int64_t word_divstep59(int64_t m[2][2],int64_t d,uint64_t f,uint64_t g);
1170
842// Return maximum of two unsigned 64-bit words 1171// Return maximum of two unsigned 64-bit words
843// Inputs a, b; output function return 1172// Inputs a, b; output function return
844extern uint64_t word_max (uint64_t a, uint64_t b); 1173extern uint64_t word_max (uint64_t a, uint64_t b);
@@ -851,6 +1180,10 @@ extern uint64_t word_min (uint64_t a, uint64_t b);
851// Input a; output function return 1180// Input a; output function return
852extern uint64_t word_negmodinv (uint64_t a); 1181extern uint64_t word_negmodinv (uint64_t a);
853 1182
1183// Count number of set bits in a single 64-bit word (population count)
1184// Input a; output function return
1185extern uint64_t word_popcount (uint64_t a);
1186
854// Single-word reciprocal, 2^64 + ret = ceil(2^128/a) - 1 if MSB of "a" is set 1187// Single-word reciprocal, 2^64 + ret = ceil(2^128/a) - 1 if MSB of "a" is set
855// Input a; output function return 1188// Input a; output function return
856extern uint64_t word_recip (uint64_t a); 1189extern uint64_t word_recip (uint64_t a);
diff --git a/src/lib/libcrypto/bn/s2n_bignum_internal.h b/src/lib/libcrypto/bn/s2n_bignum_internal.h
index b82db7d019..37eebb4fd6 100644
--- a/src/lib/libcrypto/bn/s2n_bignum_internal.h
+++ b/src/lib/libcrypto/bn/s2n_bignum_internal.h
@@ -1,3 +1,5 @@
1// $OpenBSD: s2n_bignum_internal.h,v 1.5 2025/08/12 10:01:37 jsing Exp $
2//
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// 4//
3// Permission to use, copy, modify, and/or distribute this software for any 5// Permission to use, copy, modify, and/or distribute this software for any
@@ -14,14 +16,14 @@
14 16
15#ifdef __APPLE__ 17#ifdef __APPLE__
16# define S2N_BN_SYMBOL(NAME) _##NAME 18# define S2N_BN_SYMBOL(NAME) _##NAME
19# if defined(__AARCH64EL__) || defined(__ARMEL__)
20# define __LF %%
21# else
22# define __LF ;
23# endif
17#else 24#else
18# define S2N_BN_SYMBOL(name) name 25# define S2N_BN_SYMBOL(name) name
19#endif 26# define __LF ;
20
21#ifdef __CET__
22# include <cet.h>
23#else
24# define _CET_ENDBR
25#endif 27#endif
26 28
27#define S2N_BN_SYM_VISIBILITY_DIRECTIVE(name) .globl S2N_BN_SYMBOL(name) 29#define S2N_BN_SYM_VISIBILITY_DIRECTIVE(name) .globl S2N_BN_SYMBOL(name)
@@ -34,3 +36,24 @@
34#else 36#else
35# define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) /* NO-OP: S2N_BN_SYM_PRIVACY_DIRECTIVE */ 37# define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) /* NO-OP: S2N_BN_SYM_PRIVACY_DIRECTIVE */
36#endif 38#endif
39
40// Enable indirect branch tracking support unless explicitly disabled
41// with -DNO_IBT. If the platform supports CET, simply inherit this from
42// the usual header. Otherwise manually define _CET_ENDBR, used at each
43// x86 entry point, to be the ENDBR64 instruction, with an explicit byte
44// sequence for compilers/assemblers that don't know about it. Note that
45// it is safe to use ENDBR64 on all platforms, since the encoding is by
46// design interpreted as a NOP on all pre-CET x86_64 processors. The only
47// downside is a small increase in code size and potentially a modest
48// slowdown from executing one more instruction.
49
50#if NO_IBT
51# if defined(_CET_ENDBR)
52# error "The s2n-bignum build option NO_IBT was configured, but _CET_ENDBR is defined in this compilation unit. That is weird, so failing the build."
53# endif
54# define _CET_ENDBR
55#elif defined(__CET__)
56# include <cet.h>
57#elif !defined(_CET_ENDBR)
58# define _CET_ENDBR .byte 0xf3,0x0f,0x1e,0xfa
59#endif