diff options
Diffstat (limited to '')
31 files changed, 2988 insertions, 388 deletions
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S b/src/lib/libcrypto/bn/arch/amd64/bignum_add.S index 5fe4aae7a1..1d4e6d08ef 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_add.S | |||
@@ -1,3 +1,5 @@ | |||
1 | // $OpenBSD: bignum_add.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
2 | // | ||
1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
2 | // | 4 | // |
3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
@@ -16,9 +18,8 @@ | |||
16 | // Add, z := x + y | 18 | // Add, z := x + y |
17 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] | 19 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] |
18 | // | 20 | // |
19 | // extern uint64_t bignum_add | 21 | // extern uint64_t bignum_add(uint64_t p, uint64_t *z, uint64_t m, |
20 | // (uint64_t p, uint64_t *z, | 22 | // const uint64_t *x, uint64_t n, const uint64_t *y); |
21 | // uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | ||
22 | // | 23 | // |
23 | // Does the z := x + y operation, truncating modulo p words in general and | 24 | // Does the z := x + y operation, truncating modulo p words in general and |
24 | // returning a top carry (0 or 1) in the p'th place, only adding the input | 25 | // returning a top carry (0 or 1) in the p'th place, only adding the input |
@@ -49,7 +50,7 @@ | |||
49 | 50 | ||
50 | 51 | ||
51 | S2N_BN_SYMBOL(bignum_add): | 52 | S2N_BN_SYMBOL(bignum_add): |
52 | _CET_ENDBR | 53 | _CET_ENDBR |
53 | 54 | ||
54 | #if WINDOWS_ABI | 55 | #if WINDOWS_ABI |
55 | push rdi | 56 | push rdi |
@@ -75,7 +76,7 @@ S2N_BN_SYMBOL(bignum_add): | |||
75 | cmp p, n | 76 | cmp p, n |
76 | cmovc n, p | 77 | cmovc n, p |
77 | cmp m, n | 78 | cmp m, n |
78 | jc ylonger | 79 | jc bignum_add_ylonger |
79 | 80 | ||
80 | // The case where x is longer or of the same size (p >= m >= n) | 81 | // The case where x is longer or of the same size (p >= m >= n) |
81 | 82 | ||
@@ -83,27 +84,27 @@ S2N_BN_SYMBOL(bignum_add): | |||
83 | sub m, n | 84 | sub m, n |
84 | inc m | 85 | inc m |
85 | test n, n | 86 | test n, n |
86 | jz xtest | 87 | jz bignum_add_xtest |
87 | xmainloop: | 88 | bignum_add_xmainloop: |
88 | mov a, [x+8*i] | 89 | mov a, [x+8*i] |
89 | adc a, [y+8*i] | 90 | adc a, [y+8*i] |
90 | mov [z+8*i],a | 91 | mov [z+8*i],a |
91 | inc i | 92 | inc i |
92 | dec n | 93 | dec n |
93 | jnz xmainloop | 94 | jnz bignum_add_xmainloop |
94 | jmp xtest | 95 | jmp bignum_add_xtest |
95 | xtoploop: | 96 | bignum_add_xtoploop: |
96 | mov a, [x+8*i] | 97 | mov a, [x+8*i] |
97 | adc a, 0 | 98 | adc a, 0 |
98 | mov [z+8*i],a | 99 | mov [z+8*i],a |
99 | inc i | 100 | inc i |
100 | xtest: | 101 | bignum_add_xtest: |
101 | dec m | 102 | dec m |
102 | jnz xtoploop | 103 | jnz bignum_add_xtoploop |
103 | mov ashort, 0 | 104 | mov ashort, 0 |
104 | adc a, 0 | 105 | adc a, 0 |
105 | test p, p | 106 | test p, p |
106 | jnz tails | 107 | jnz bignum_add_tails |
107 | #if WINDOWS_ABI | 108 | #if WINDOWS_ABI |
108 | pop rsi | 109 | pop rsi |
109 | pop rdi | 110 | pop rdi |
@@ -112,30 +113,30 @@ xtest: | |||
112 | 113 | ||
113 | // The case where y is longer (p >= n > m) | 114 | // The case where y is longer (p >= n > m) |
114 | 115 | ||
115 | ylonger: | 116 | bignum_add_ylonger: |
116 | 117 | ||
117 | sub p, n | 118 | sub p, n |
118 | sub n, m | 119 | sub n, m |
119 | test m, m | 120 | test m, m |
120 | jz ytoploop | 121 | jz bignum_add_ytoploop |
121 | ymainloop: | 122 | bignum_add_ymainloop: |
122 | mov a, [x+8*i] | 123 | mov a, [x+8*i] |
123 | adc a, [y+8*i] | 124 | adc a, [y+8*i] |
124 | mov [z+8*i],a | 125 | mov [z+8*i],a |
125 | inc i | 126 | inc i |
126 | dec m | 127 | dec m |
127 | jnz ymainloop | 128 | jnz bignum_add_ymainloop |
128 | ytoploop: | 129 | bignum_add_ytoploop: |
129 | mov a, [y+8*i] | 130 | mov a, [y+8*i] |
130 | adc a, 0 | 131 | adc a, 0 |
131 | mov [z+8*i],a | 132 | mov [z+8*i],a |
132 | inc i | 133 | inc i |
133 | dec n | 134 | dec n |
134 | jnz ytoploop | 135 | jnz bignum_add_ytoploop |
135 | mov ashort, 0 | 136 | mov ashort, 0 |
136 | adc a, 0 | 137 | adc a, 0 |
137 | test p, p | 138 | test p, p |
138 | jnz tails | 139 | jnz bignum_add_tails |
139 | #if WINDOWS_ABI | 140 | #if WINDOWS_ABI |
140 | pop rsi | 141 | pop rsi |
141 | pop rdi | 142 | pop rdi |
@@ -144,16 +145,16 @@ ytoploop: | |||
144 | 145 | ||
145 | // Adding a non-trivial tail, when p > max(m,n) | 146 | // Adding a non-trivial tail, when p > max(m,n) |
146 | 147 | ||
147 | tails: | 148 | bignum_add_tails: |
148 | mov [z+8*i],a | 149 | mov [z+8*i],a |
149 | xor a, a | 150 | xor a, a |
150 | jmp tail | 151 | jmp bignum_add_tail |
151 | tailloop: | 152 | bignum_add_tailloop: |
152 | mov [z+8*i],a | 153 | mov [z+8*i],a |
153 | tail: | 154 | bignum_add_tail: |
154 | inc i | 155 | inc i |
155 | dec p | 156 | dec p |
156 | jnz tailloop | 157 | jnz bignum_add_tailloop |
157 | #if WINDOWS_ABI | 158 | #if WINDOWS_ABI |
158 | pop rsi | 159 | pop rsi |
159 | pop rdi | 160 | pop rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S index 25ba17bce2..a611919603 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S | |||
@@ -1,3 +1,5 @@ | |||
1 | // $OpenBSD: bignum_cmadd.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
2 | // | ||
1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
2 | // | 4 | // |
3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
@@ -16,8 +18,8 @@ | |||
16 | // Multiply-add with single-word multiplier, z := z + c * y | 18 | // Multiply-add with single-word multiplier, z := z + c * y |
17 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] | 19 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] |
18 | // | 20 | // |
19 | // extern uint64_t bignum_cmadd | 21 | // extern uint64_t bignum_cmadd(uint64_t k, uint64_t *z, uint64_t c, uint64_t n, |
20 | // (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); | 22 | // const uint64_t *y); |
21 | // | 23 | // |
22 | // Does the "z := z + c * y" operation where y is n digits, result z is p. | 24 | // Does the "z := z + c * y" operation where y is n digits, result z is p. |
23 | // Truncates the result in general. | 25 | // Truncates the result in general. |
@@ -54,7 +56,7 @@ | |||
54 | 56 | ||
55 | 57 | ||
56 | S2N_BN_SYMBOL(bignum_cmadd): | 58 | S2N_BN_SYMBOL(bignum_cmadd): |
57 | _CET_ENDBR | 59 | _CET_ENDBR |
58 | 60 | ||
59 | #if WINDOWS_ABI | 61 | #if WINDOWS_ABI |
60 | push rdi | 62 | push rdi |
@@ -82,7 +84,7 @@ S2N_BN_SYMBOL(bignum_cmadd): | |||
82 | 84 | ||
83 | xor h, h | 85 | xor h, h |
84 | test n, n | 86 | test n, n |
85 | jz end | 87 | jz bignum_cmadd_end |
86 | 88 | ||
87 | // Move c into a safer register as multiplies overwrite rdx | 89 | // Move c into a safer register as multiplies overwrite rdx |
88 | 90 | ||
@@ -96,11 +98,11 @@ S2N_BN_SYMBOL(bignum_cmadd): | |||
96 | mov h, rdx | 98 | mov h, rdx |
97 | mov ishort, 1 | 99 | mov ishort, 1 |
98 | dec n | 100 | dec n |
99 | jz hightail | 101 | jz bignum_cmadd_hightail |
100 | 102 | ||
101 | // Main loop, where we always have CF + previous high part h to add in | 103 | // Main loop, where we always have CF + previous high part h to add in |
102 | 104 | ||
103 | loop: | 105 | bignum_cmadd_loop: |
104 | adc h, [z+8*i] | 106 | adc h, [z+8*i] |
105 | sbb r, r | 107 | sbb r, r |
106 | mov rax, [x+8*i] | 108 | mov rax, [x+8*i] |
@@ -111,36 +113,36 @@ loop: | |||
111 | mov h, rdx | 113 | mov h, rdx |
112 | inc i | 114 | inc i |
113 | dec n | 115 | dec n |
114 | jnz loop | 116 | jnz bignum_cmadd_loop |
115 | 117 | ||
116 | hightail: | 118 | bignum_cmadd_hightail: |
117 | adc h, 0 | 119 | adc h, 0 |
118 | 120 | ||
119 | // Propagate the carry all the way to the end with h as extra carry word | 121 | // Propagate the carry all the way to the end with h as extra carry word |
120 | 122 | ||
121 | tail: | 123 | bignum_cmadd_tail: |
122 | test p, p | 124 | test p, p |
123 | jz end | 125 | jz bignum_cmadd_end |
124 | 126 | ||
125 | add [z+8*i], h | 127 | add [z+8*i], h |
126 | mov hshort, 0 | 128 | mov hshort, 0 |
127 | inc i | 129 | inc i |
128 | dec p | 130 | dec p |
129 | jz highend | 131 | jz bignum_cmadd_highend |
130 | 132 | ||
131 | tloop: | 133 | bignum_cmadd_tloop: |
132 | adc [z+8*i], h | 134 | adc [z+8*i], h |
133 | inc i | 135 | inc i |
134 | dec p | 136 | dec p |
135 | jnz tloop | 137 | jnz bignum_cmadd_tloop |
136 | 138 | ||
137 | highend: | 139 | bignum_cmadd_highend: |
138 | 140 | ||
139 | adc h, 0 | 141 | adc h, 0 |
140 | 142 | ||
141 | // Return the high/carry word | 143 | // Return the high/carry word |
142 | 144 | ||
143 | end: | 145 | bignum_cmadd_end: |
144 | mov rax, h | 146 | mov rax, h |
145 | 147 | ||
146 | pop rbx | 148 | pop rbx |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S index 12f785d63a..eb71d9da44 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S | |||
@@ -1,3 +1,5 @@ | |||
1 | // $OpenBSD: bignum_cmul.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
2 | // | ||
1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
2 | // | 4 | // |
3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
@@ -16,8 +18,8 @@ | |||
16 | // Multiply by a single word, z := c * y | 18 | // Multiply by a single word, z := c * y |
17 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] | 19 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] |
18 | // | 20 | // |
19 | // extern uint64_t bignum_cmul | 21 | // extern uint64_t bignum_cmul(uint64_t k, uint64_t *z, uint64_t c, uint64_t n, |
20 | // (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); | 22 | // const uint64_t *y); |
21 | // | 23 | // |
22 | // Does the "z := c * y" operation where y is n digits, result z is p. | 24 | // Does the "z := c * y" operation where y is n digits, result z is p. |
23 | // Truncates the result in general unless p >= n + 1. | 25 | // Truncates the result in general unless p >= n + 1. |
@@ -51,7 +53,7 @@ | |||
51 | 53 | ||
52 | 54 | ||
53 | S2N_BN_SYMBOL(bignum_cmul): | 55 | S2N_BN_SYMBOL(bignum_cmul): |
54 | _CET_ENDBR | 56 | _CET_ENDBR |
55 | 57 | ||
56 | #if WINDOWS_ABI | 58 | #if WINDOWS_ABI |
57 | push rdi | 59 | push rdi |
@@ -76,7 +78,7 @@ S2N_BN_SYMBOL(bignum_cmul): | |||
76 | xor h, h | 78 | xor h, h |
77 | xor i, i | 79 | xor i, i |
78 | test n, n | 80 | test n, n |
79 | jz tail | 81 | jz bignum_cmul_tail |
80 | 82 | ||
81 | // Move c into a safer register as multiplies overwrite rdx | 83 | // Move c into a safer register as multiplies overwrite rdx |
82 | 84 | ||
@@ -90,11 +92,11 @@ S2N_BN_SYMBOL(bignum_cmul): | |||
90 | mov h, rdx | 92 | mov h, rdx |
91 | inc i | 93 | inc i |
92 | cmp i, n | 94 | cmp i, n |
93 | jz tail | 95 | jz bignum_cmul_tail |
94 | 96 | ||
95 | // Main loop doing the multiplications | 97 | // Main loop doing the multiplications |
96 | 98 | ||
97 | loop: | 99 | bignum_cmul_loop: |
98 | mov rax, [x+8*i] | 100 | mov rax, [x+8*i] |
99 | mul c | 101 | mul c |
100 | add rax, h | 102 | add rax, h |
@@ -103,28 +105,28 @@ loop: | |||
103 | mov h, rdx | 105 | mov h, rdx |
104 | inc i | 106 | inc i |
105 | cmp i, n | 107 | cmp i, n |
106 | jc loop | 108 | jc bignum_cmul_loop |
107 | 109 | ||
108 | // Add a tail when the destination is longer | 110 | // Add a tail when the destination is longer |
109 | 111 | ||
110 | tail: | 112 | bignum_cmul_tail: |
111 | cmp i, p | 113 | cmp i, p |
112 | jnc end | 114 | jnc bignum_cmul_end |
113 | mov [z+8*i], h | 115 | mov [z+8*i], h |
114 | xor h, h | 116 | xor h, h |
115 | inc i | 117 | inc i |
116 | cmp i, p | 118 | cmp i, p |
117 | jnc end | 119 | jnc bignum_cmul_end |
118 | 120 | ||
119 | tloop: | 121 | bignum_cmul_tloop: |
120 | mov [z+8*i], h | 122 | mov [z+8*i], h |
121 | inc i | 123 | inc i |
122 | cmp i, p | 124 | cmp i, p |
123 | jc tloop | 125 | jc bignum_cmul_tloop |
124 | 126 | ||
125 | // Return the high/carry word | 127 | // Return the high/carry word |
126 | 128 | ||
127 | end: | 129 | bignum_cmul_end: |
128 | mov rax, h | 130 | mov rax, h |
129 | 131 | ||
130 | #if WINDOWS_ABI | 132 | #if WINDOWS_ABI |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_modadd.S b/src/lib/libcrypto/bn/arch/amd64/bignum_modadd.S new file mode 100644 index 0000000000..baf27fdc7f --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_modadd.S | |||
@@ -0,0 +1,112 @@ | |||
1 | // $OpenBSD: bignum_modadd.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
2 | // | ||
3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
4 | // | ||
5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
6 | // purpose with or without fee is hereby granted, provided that the above | ||
7 | // copyright notice and this permission notice appear in all copies. | ||
8 | // | ||
9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
16 | |||
17 | // ---------------------------------------------------------------------------- | ||
18 | // Add modulo m, z := (x + y) mod m, assuming x and y reduced | ||
19 | // Inputs x[k], y[k], m[k]; output z[k] | ||
20 | // | ||
21 | // extern void bignum_modadd(uint64_t k, uint64_t *z, const uint64_t *x, | ||
22 | // const uint64_t *y, const uint64_t *m); | ||
23 | // | ||
24 | // Standard x86-64 ABI: RDI = k, RSI = z, RDX = x, RCX = y, R8 = m | ||
25 | // Microsoft x64 ABI: RCX = k, RDX = z, R8 = x, R9 = y, [RSP+40] = m | ||
26 | // ---------------------------------------------------------------------------- | ||
27 | |||
28 | #include "s2n_bignum_internal.h" | ||
29 | |||
30 | .intel_syntax noprefix | ||
31 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_modadd) | ||
32 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_modadd) | ||
33 | .text | ||
34 | |||
35 | #define k rdi | ||
36 | #define z rsi | ||
37 | #define x rdx | ||
38 | #define y rcx | ||
39 | #define m r8 | ||
40 | #define i r9 | ||
41 | #define j r10 | ||
42 | #define a rax | ||
43 | #define c r11 | ||
44 | |||
45 | S2N_BN_SYMBOL(bignum_modadd): | ||
46 | _CET_ENDBR | ||
47 | |||
48 | #if WINDOWS_ABI | ||
49 | push rdi | ||
50 | push rsi | ||
51 | mov rdi, rcx | ||
52 | mov rsi, rdx | ||
53 | mov rdx, r8 | ||
54 | mov rcx, r9 | ||
55 | mov r8, [rsp+56] | ||
56 | #endif | ||
57 | |||
58 | // If k = 0 do nothing | ||
59 | |||
60 | test k, k | ||
61 | jz bignum_modadd_end | ||
62 | |||
63 | // First just add (c::z) := x + y | ||
64 | |||
65 | xor c, c | ||
66 | mov j, k | ||
67 | xor i, i | ||
68 | bignum_modadd_addloop: | ||
69 | mov a, [x+8*i] | ||
70 | adc a, [y+8*i] | ||
71 | mov [z+8*i], a | ||
72 | inc i | ||
73 | dec j | ||
74 | jnz bignum_modadd_addloop | ||
75 | adc c, 0 | ||
76 | |||
77 | // Now do a comparison subtraction (c::z) - m, recording mask for (c::z) >= m | ||
78 | |||
79 | mov j, k | ||
80 | xor i, i | ||
81 | bignum_modadd_cmploop: | ||
82 | mov a, [z+8*i] | ||
83 | sbb a, [m+8*i] | ||
84 | inc i | ||
85 | dec j | ||
86 | jnz bignum_modadd_cmploop | ||
87 | sbb c, 0 | ||
88 | not c | ||
89 | |||
90 | // Now do a masked subtraction z := z - [c] * m | ||
91 | |||
92 | xor i, i | ||
93 | bignum_modadd_subloop: | ||
94 | mov a, [m+8*i] | ||
95 | and a, c | ||
96 | neg j | ||
97 | sbb [z+8*i], a | ||
98 | sbb j, j | ||
99 | inc i | ||
100 | cmp i, k | ||
101 | jc bignum_modadd_subloop | ||
102 | |||
103 | bignum_modadd_end: | ||
104 | #if WINDOWS_ABI | ||
105 | pop rsi | ||
106 | pop rdi | ||
107 | #endif | ||
108 | ret | ||
109 | |||
110 | #if defined(__linux__) && defined(__ELF__) | ||
111 | .section .note.GNU-stack,"",%progbits | ||
112 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_modsub.S b/src/lib/libcrypto/bn/arch/amd64/bignum_modsub.S new file mode 100644 index 0000000000..63b3230e35 --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_modsub.S | |||
@@ -0,0 +1,99 @@ | |||
1 | // $OpenBSD: bignum_modsub.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
2 | // | ||
3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
4 | // | ||
5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
6 | // purpose with or without fee is hereby granted, provided that the above | ||
7 | // copyright notice and this permission notice appear in all copies. | ||
8 | // | ||
9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
16 | |||
17 | // ---------------------------------------------------------------------------- | ||
18 | // Subtract modulo m, z := (x - y) mod m, assuming x and y reduced | ||
19 | // Inputs x[k], y[k], m[k]; output z[k] | ||
20 | // | ||
21 | // extern void bignum_modsub(uint64_t k, uint64_t *z, const uint64_t *x, | ||
22 | // const uint64_t *y, const uint64_t *m); | ||
23 | // | ||
24 | // Standard x86-64 ABI: RDI = k, RSI = z, RDX = x, RCX = y, R8 = m | ||
25 | // Microsoft x64 ABI: RCX = k, RDX = z, R8 = x, R9 = y, [RSP+40] = m | ||
26 | // ---------------------------------------------------------------------------- | ||
27 | |||
28 | #include "s2n_bignum_internal.h" | ||
29 | |||
30 | .intel_syntax noprefix | ||
31 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_modsub) | ||
32 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_modsub) | ||
33 | .text | ||
34 | |||
35 | #define k rdi | ||
36 | #define z rsi | ||
37 | #define x rdx | ||
38 | #define y rcx | ||
39 | #define m r8 | ||
40 | #define i r9 | ||
41 | #define j r10 | ||
42 | #define a rax | ||
43 | #define c r11 | ||
44 | |||
45 | S2N_BN_SYMBOL(bignum_modsub): | ||
46 | _CET_ENDBR | ||
47 | |||
48 | #if WINDOWS_ABI | ||
49 | push rdi | ||
50 | push rsi | ||
51 | mov rdi, rcx | ||
52 | mov rsi, rdx | ||
53 | mov rdx, r8 | ||
54 | mov rcx, r9 | ||
55 | mov r8, [rsp+56] | ||
56 | #endif | ||
57 | |||
58 | // If k = 0 do nothing | ||
59 | |||
60 | test k, k | ||
61 | jz bignum_modsub_end | ||
62 | |||
63 | // Subtract z := x - y and record a mask for the carry x - y < 0 | ||
64 | |||
65 | xor c, c | ||
66 | mov j, k | ||
67 | xor i, i | ||
68 | bignum_modsub_subloop: | ||
69 | mov a, [x+8*i] | ||
70 | sbb a, [y+8*i] | ||
71 | mov [z+8*i], a | ||
72 | inc i | ||
73 | dec j | ||
74 | jnz bignum_modsub_subloop | ||
75 | sbb c, c | ||
76 | |||
77 | // Now do a masked addition z := z + [c] * m | ||
78 | |||
79 | xor i, i | ||
80 | bignum_modsub_addloop: | ||
81 | mov a, [m+8*i] | ||
82 | and a, c | ||
83 | neg j | ||
84 | adc [z+8*i], a | ||
85 | sbb j, j | ||
86 | inc i | ||
87 | cmp i, k | ||
88 | jc bignum_modsub_addloop | ||
89 | |||
90 | bignum_modsub_end: | ||
91 | #if WINDOWS_ABI | ||
92 | pop rsi | ||
93 | pop rdi | ||
94 | #endif | ||
95 | ret | ||
96 | |||
97 | #if defined(__linux__) && defined(__ELF__) | ||
98 | .section .note.GNU-stack,"",%progbits | ||
99 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S index a3552679a2..538cce9af7 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S | |||
@@ -1,3 +1,5 @@ | |||
1 | // $OpenBSD: bignum_mul.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
2 | // | ||
1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
2 | // | 4 | // |
3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
@@ -16,9 +18,8 @@ | |||
16 | // Multiply z := x * y | 18 | // Multiply z := x * y |
17 | // Inputs x[m], y[n]; output z[k] | 19 | // Inputs x[m], y[n]; output z[k] |
18 | // | 20 | // |
19 | // extern void bignum_mul | 21 | // extern void bignum_mul(uint64_t k, uint64_t *z, uint64_t m, const uint64_t *x, |
20 | // (uint64_t k, uint64_t *z, | 22 | // uint64_t n, const uint64_t *y); |
21 | // uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | ||
22 | // | 23 | // |
23 | // Does the "z := x * y" operation where x is m digits, y is n, result z is k. | 24 | // Does the "z := x * y" operation where x is m digits, y is n, result z is k. |
24 | // Truncates the result in general unless k >= m + n | 25 | // Truncates the result in general unless k >= m + n |
@@ -59,7 +60,7 @@ | |||
59 | 60 | ||
60 | 61 | ||
61 | S2N_BN_SYMBOL(bignum_mul): | 62 | S2N_BN_SYMBOL(bignum_mul): |
62 | _CET_ENDBR | 63 | _CET_ENDBR |
63 | 64 | ||
64 | #if WINDOWS_ABI | 65 | #if WINDOWS_ABI |
65 | push rdi | 66 | push rdi |
@@ -88,7 +89,7 @@ S2N_BN_SYMBOL(bignum_mul): | |||
88 | // If we did a multiply-add variant, however, then we could | 89 | // If we did a multiply-add variant, however, then we could |
89 | 90 | ||
90 | test p, p | 91 | test p, p |
91 | jz end | 92 | jz bignum_mul_end |
92 | 93 | ||
93 | // Set initial 2-part sum to zero (we zero c inside the body) | 94 | // Set initial 2-part sum to zero (we zero c inside the body) |
94 | 95 | ||
@@ -99,7 +100,7 @@ S2N_BN_SYMBOL(bignum_mul): | |||
99 | 100 | ||
100 | xor k, k | 101 | xor k, k |
101 | 102 | ||
102 | outerloop: | 103 | bignum_mul_outerloop: |
103 | 104 | ||
104 | // Zero our carry term first; we eventually want it and a zero is useful now | 105 | // Zero our carry term first; we eventually want it and a zero is useful now |
105 | // Set a = max 0 (k + 1 - n), i = min (k + 1) m | 106 | // Set a = max 0 (k + 1 - n), i = min (k + 1) m |
@@ -125,11 +126,11 @@ outerloop: | |||
125 | mov d, k | 126 | mov d, k |
126 | sub d, i | 127 | sub d, i |
127 | sub i, a | 128 | sub i, a |
128 | jbe innerend | 129 | jbe bignum_mul_innerend |
129 | lea x,[rcx+8*a] | 130 | lea x,[rcx+8*a] |
130 | lea y,[r9+8*d-8] | 131 | lea y,[r9+8*d-8] |
131 | 132 | ||
132 | innerloop: | 133 | bignum_mul_innerloop: |
133 | mov rax, [y+8*i] | 134 | mov rax, [y+8*i] |
134 | mul QWORD PTR [x] | 135 | mul QWORD PTR [x] |
135 | add x, 8 | 136 | add x, 8 |
@@ -137,9 +138,9 @@ innerloop: | |||
137 | adc h, rdx | 138 | adc h, rdx |
138 | adc c, 0 | 139 | adc c, 0 |
139 | dec i | 140 | dec i |
140 | jnz innerloop | 141 | jnz bignum_mul_innerloop |
141 | 142 | ||
142 | innerend: | 143 | bignum_mul_innerend: |
143 | 144 | ||
144 | mov [z], l | 145 | mov [z], l |
145 | mov l, h | 146 | mov l, h |
@@ -147,9 +148,9 @@ innerend: | |||
147 | add z, 8 | 148 | add z, 8 |
148 | 149 | ||
149 | cmp k, p | 150 | cmp k, p |
150 | jc outerloop | 151 | jc bignum_mul_outerloop |
151 | 152 | ||
152 | end: | 153 | bignum_mul_end: |
153 | pop r15 | 154 | pop r15 |
154 | pop r14 | 155 | pop r14 |
155 | pop r13 | 156 | pop r13 |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8.S new file mode 100644 index 0000000000..d6ad514020 --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8.S | |||
@@ -0,0 +1,187 @@ | |||
1 | // $OpenBSD: bignum_mul_4_8.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
2 | // | ||
3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
4 | // | ||
5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
6 | // purpose with or without fee is hereby granted, provided that the above | ||
7 | // copyright notice and this permission notice appear in all copies. | ||
8 | // | ||
9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
16 | |||
17 | // ---------------------------------------------------------------------------- | ||
18 | // Multiply z := x * y | ||
19 | // Inputs x[4], y[4]; output z[8] | ||
20 | // | ||
21 | // extern void bignum_mul_4_8(uint64_t z[static 8], const uint64_t x[static 4], | ||
22 | // const uint64_t y[static 4]); | ||
23 | // | ||
24 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y | ||
25 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y | ||
26 | // ---------------------------------------------------------------------------- | ||
27 | |||
28 | #include "s2n_bignum_internal.h" | ||
29 | |||
30 | .intel_syntax noprefix | ||
31 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_4_8) | ||
32 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_4_8) | ||
33 | .text | ||
34 | |||
35 | // These are actually right | ||
36 | |||
37 | #define z rdi | ||
38 | #define x rsi | ||
39 | |||
40 | // Copied in or set up | ||
41 | |||
42 | #define y rcx | ||
43 | |||
44 | // A zero register | ||
45 | |||
46 | #define zero rbp | ||
47 | #define zeroe ebp | ||
48 | |||
49 | // Add in x[i] * rdx to the (i,i+1) position with the register window | ||
50 | // Would be nice to have conditional expressions reg[i], reg[i+1] ... | ||
51 | |||
52 | .macro mulpadd arg1,arg2 | ||
53 | mulx rbx, rax, [x+8*\arg2] | ||
54 | .if ((\arg1 + \arg2) % 4 == 0) | ||
55 | adcx r8, rax | ||
56 | adox r9, rbx | ||
57 | .elseif ((\arg1 + \arg2) % 4 == 1) | ||
58 | adcx r9, rax | ||
59 | adox r10, rbx | ||
60 | .elseif ((\arg1 + \arg2) % 4 == 2) | ||
61 | adcx r10, rax | ||
62 | adox r11, rbx | ||
63 | .elseif ((\arg1 + \arg2) % 4 == 3) | ||
64 | adcx r11, rax | ||
65 | adox r8, rbx | ||
66 | .endif | ||
67 | |||
68 | .endm | ||
69 | |||
70 | |||
71 | // Add in the whole j'th row | ||
72 | |||
73 | .macro addrow arg1 | ||
74 | mov rdx, [y+8*\arg1] | ||
75 | xor zeroe, zeroe | ||
76 | |||
77 | mulpadd \arg1, 0 | ||
78 | |||
79 | .if (\arg1 % 4 == 0) | ||
80 | mov [z+8*\arg1],r8 | ||
81 | .elseif (\arg1 % 4 == 1) | ||
82 | mov [z+8*\arg1],r9 | ||
83 | .elseif (\arg1 % 4 == 2) | ||
84 | mov [z+8*\arg1],r10 | ||
85 | .elseif (\arg1 % 4 == 3) | ||
86 | mov [z+8*\arg1],r11 | ||
87 | .endif | ||
88 | |||
89 | mulpadd \arg1, 1 | ||
90 | mulpadd \arg1, 2 | ||
91 | |||
92 | .if (\arg1 % 4 == 0) | ||
93 | mulx r8, rax, [x+24] | ||
94 | adcx r11, rax | ||
95 | adox r8, zero | ||
96 | adcx r8, zero | ||
97 | .elseif (\arg1 % 4 == 1) | ||
98 | mulx r9, rax, [x+24] | ||
99 | adcx r8, rax | ||
100 | adox r9, zero | ||
101 | adcx r9, zero | ||
102 | .elseif (\arg1 % 4 == 2) | ||
103 | mulx r10, rax, [x+24] | ||
104 | adcx r9, rax | ||
105 | adox r10, zero | ||
106 | adcx r10, zero | ||
107 | .elseif (\arg1 % 4 == 3) | ||
108 | mulx r11, rax, [x+24] | ||
109 | adcx r10, rax | ||
110 | adox r11, zero | ||
111 | adcx r11, zero | ||
112 | .endif | ||
113 | |||
114 | .endm | ||
115 | |||
116 | |||
117 | |||
118 | S2N_BN_SYMBOL(bignum_mul_4_8): | ||
119 | _CET_ENDBR | ||
120 | |||
121 | #if WINDOWS_ABI | ||
122 | push rdi | ||
123 | push rsi | ||
124 | mov rdi, rcx | ||
125 | mov rsi, rdx | ||
126 | mov rdx, r8 | ||
127 | #endif | ||
128 | |||
129 | // Save more registers to play with | ||
130 | |||
131 | push rbp | ||
132 | push rbx | ||
133 | |||
134 | // Copy y into a safe register to start with | ||
135 | |||
136 | mov y, rdx | ||
137 | |||
138 | // Zero a register, which also makes sure we don't get a fake carry-in | ||
139 | |||
140 | xor zeroe, zeroe | ||
141 | |||
142 | // Do the zeroth row, which is a bit different | ||
143 | // Write back the zero-zero product and then accumulate | ||
144 | // r8,r11,r10,r9 as y[0] * x from 1..4 | ||
145 | |||
146 | mov rdx, [y] | ||
147 | |||
148 | mulx r9, r8, [x] | ||
149 | mov [z], r8 | ||
150 | |||
151 | mulx r10, rbx, [x+8] | ||
152 | adcx r9, rbx | ||
153 | |||
154 | mulx r11, rbx, [x+16] | ||
155 | adcx r10, rbx | ||
156 | |||
157 | mulx r8, rbx, [x+24] | ||
158 | adcx r11, rbx | ||
159 | adcx r8, zero | ||
160 | |||
161 | // Now all the other rows in a uniform pattern | ||
162 | |||
163 | addrow 1 | ||
164 | addrow 2 | ||
165 | addrow 3 | ||
166 | |||
167 | // Now write back the additional columns | ||
168 | |||
169 | mov [z+32], r8 | ||
170 | mov [z+40], r9 | ||
171 | mov [z+48], r10 | ||
172 | mov [z+56], r11 | ||
173 | |||
174 | // Restore registers and return | ||
175 | |||
176 | pop rbx | ||
177 | pop rbp | ||
178 | |||
179 | #if WINDOWS_ABI | ||
180 | pop rsi | ||
181 | pop rdi | ||
182 | #endif | ||
183 | ret | ||
184 | |||
185 | #if defined(__linux__) && defined(__ELF__) | ||
186 | .section .note.GNU-stack,"",%progbits | ||
187 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S index 70ff69e372..2592d1d658 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S | |||
@@ -1,3 +1,5 @@ | |||
1 | // $OpenBSD: bignum_mul_4_8_alt.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
2 | // | ||
1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
2 | // | 4 | // |
3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
@@ -16,8 +18,8 @@ | |||
16 | // Multiply z := x * y | 18 | // Multiply z := x * y |
17 | // Inputs x[4], y[4]; output z[8] | 19 | // Inputs x[4], y[4]; output z[8] |
18 | // | 20 | // |
19 | // extern void bignum_mul_4_8_alt | 21 | // extern void bignum_mul_4_8_alt(uint64_t z[static 8], const uint64_t x[static 4], |
20 | // (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]); | 22 | // const uint64_t y[static 4]); |
21 | // | 23 | // |
22 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y | 24 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y |
23 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y | 25 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y |
@@ -72,7 +74,7 @@ | |||
72 | adc h, rdx | 74 | adc h, rdx |
73 | 75 | ||
74 | S2N_BN_SYMBOL(bignum_mul_4_8_alt): | 76 | S2N_BN_SYMBOL(bignum_mul_4_8_alt): |
75 | _CET_ENDBR | 77 | _CET_ENDBR |
76 | 78 | ||
77 | #if WINDOWS_ABI | 79 | #if WINDOWS_ABI |
78 | push rdi | 80 | push rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12.S new file mode 100644 index 0000000000..56cbdf06e0 --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12.S | |||
@@ -0,0 +1,223 @@ | |||
1 | // $OpenBSD: bignum_mul_6_12.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
2 | // | ||
3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
4 | // | ||
5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
6 | // purpose with or without fee is hereby granted, provided that the above | ||
7 | // copyright notice and this permission notice appear in all copies. | ||
8 | // | ||
9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
16 | |||
17 | // ---------------------------------------------------------------------------- | ||
18 | // Multiply z := x * y | ||
19 | // Inputs x[6], y[6]; output z[12] | ||
20 | // | ||
21 | // extern void bignum_mul_6_12(uint64_t z[static 12], const uint64_t x[static 6], | ||
22 | // const uint64_t y[static 6]); | ||
23 | // | ||
24 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y | ||
25 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y | ||
26 | // ---------------------------------------------------------------------------- | ||
27 | |||
28 | #include "s2n_bignum_internal.h" | ||
29 | |||
30 | .intel_syntax noprefix | ||
31 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_6_12) | ||
32 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_6_12) | ||
33 | .text | ||
34 | |||
35 | // These are actually right | ||
36 | |||
37 | #define z rdi | ||
38 | #define x rsi | ||
39 | |||
40 | // Copied in or set up | ||
41 | |||
42 | #define y rcx | ||
43 | |||
44 | // A zero register | ||
45 | |||
46 | #define zero rbp | ||
47 | #define zeroe ebp | ||
48 | |||
49 | // Add in x[i] * rdx to the (i,i+1) position with the register window | ||
50 | // Would be nice to have conditional expressions reg[i], reg[i+1] ... | ||
51 | |||
52 | .macro mulpadd arg1,arg2 | ||
53 | mulx rbx, rax, [x+8*\arg2] | ||
54 | .if ((\arg1 + \arg2) % 6 == 0) | ||
55 | adcx r8, rax | ||
56 | adox r9, rbx | ||
57 | .elseif ((\arg1 + \arg2) % 6 == 1) | ||
58 | adcx r9, rax | ||
59 | adox r10, rbx | ||
60 | .elseif ((\arg1 + \arg2) % 6 == 2) | ||
61 | adcx r10, rax | ||
62 | adox r11, rbx | ||
63 | .elseif ((\arg1 + \arg2) % 6 == 3) | ||
64 | adcx r11, rax | ||
65 | adox r12, rbx | ||
66 | .elseif ((\arg1 + \arg2) % 6 == 4) | ||
67 | adcx r12, rax | ||
68 | adox r13, rbx | ||
69 | .elseif ((\arg1 + \arg2) % 6 == 5) | ||
70 | adcx r13, rax | ||
71 | adox r8, rbx | ||
72 | .endif | ||
73 | |||
74 | .endm | ||
75 | |||
76 | |||
77 | // Add in the whole j'th row | ||
78 | |||
79 | .macro addrow arg1 | ||
80 | mov rdx, [y+8*\arg1] | ||
81 | xor zeroe, zeroe | ||
82 | |||
83 | mulpadd \arg1, 0 | ||
84 | |||
85 | .if (\arg1 % 6 == 0) | ||
86 | mov [z+8*\arg1],r8 | ||
87 | .elseif (\arg1 % 6 == 1) | ||
88 | mov [z+8*\arg1],r9 | ||
89 | .elseif (\arg1 % 6 == 2) | ||
90 | mov [z+8*\arg1],r10 | ||
91 | .elseif (\arg1 % 6 == 3) | ||
92 | mov [z+8*\arg1],r11 | ||
93 | .elseif (\arg1 % 6 == 4) | ||
94 | mov [z+8*\arg1],r12 | ||
95 | .elseif (\arg1 % 6 == 5) | ||
96 | mov [z+8*\arg1],r13 | ||
97 | .endif | ||
98 | |||
99 | mulpadd \arg1, 1 | ||
100 | mulpadd \arg1, 2 | ||
101 | mulpadd \arg1, 3 | ||
102 | mulpadd \arg1, 4 | ||
103 | |||
104 | .if (\arg1 % 6 == 0) | ||
105 | mulx r8, rax, [x+40] | ||
106 | adcx r13, rax | ||
107 | adox r8, zero | ||
108 | adcx r8, zero | ||
109 | .elseif (\arg1 % 6 == 1) | ||
110 | mulx r9, rax, [x+40] | ||
111 | adcx r8, rax | ||
112 | adox r9, zero | ||
113 | adcx r9, zero | ||
114 | .elseif (\arg1 % 6 == 2) | ||
115 | mulx r10, rax, [x+40] | ||
116 | adcx r9, rax | ||
117 | adox r10, zero | ||
118 | adcx r10, zero | ||
119 | .elseif (\arg1 % 6 == 3) | ||
120 | mulx r11, rax, [x+40] | ||
121 | adcx r10, rax | ||
122 | adox r11, zero | ||
123 | adcx r11, zero | ||
124 | .elseif (\arg1 % 6 == 4) | ||
125 | mulx r12, rax, [x+40] | ||
126 | adcx r11, rax | ||
127 | adox r12, zero | ||
128 | adcx r12, zero | ||
129 | .elseif (\arg1 % 6 == 5) | ||
130 | mulx r13, rax, [x+40] | ||
131 | adcx r12, rax | ||
132 | adox r13, zero | ||
133 | adcx r13, zero | ||
134 | .endif | ||
135 | |||
136 | .endm | ||
137 | |||
138 | |||
139 | |||
140 | S2N_BN_SYMBOL(bignum_mul_6_12): | ||
141 | _CET_ENDBR | ||
142 | |||
143 | #if WINDOWS_ABI | ||
144 | push rdi | ||
145 | push rsi | ||
146 | mov rdi, rcx | ||
147 | mov rsi, rdx | ||
148 | mov rdx, r8 | ||
149 | #endif | ||
150 | |||
151 | // Save more registers to play with | ||
152 | |||
153 | push rbp | ||
154 | push rbx | ||
155 | push r12 | ||
156 | push r13 | ||
157 | |||
158 | // Copy y into a safe register to start with | ||
159 | |||
160 | mov y, rdx | ||
161 | |||
162 | // Zero a register, which also makes sure we don't get a fake carry-in | ||
163 | |||
164 | xor zeroe, zeroe | ||
165 | |||
166 | // Do the zeroth row, which is a bit different | ||
167 | // Write back the zero-zero product and then accumulate | ||
168 | // r8,r13,r12,r11,r10,r9 as y[0] * x from 1..6 | ||
169 | |||
170 | mov rdx, [y] | ||
171 | |||
172 | mulx r9, r8, [x] | ||
173 | mov [z], r8 | ||
174 | |||
175 | mulx r10, rbx, [x+8] | ||
176 | adcx r9, rbx | ||
177 | |||
178 | mulx r11, rbx, [x+16] | ||
179 | adcx r10, rbx | ||
180 | |||
181 | mulx r12, rbx, [x+24] | ||
182 | adcx r11, rbx | ||
183 | |||
184 | mulx r13, rbx, [x+32] | ||
185 | adcx r12, rbx | ||
186 | |||
187 | mulx r8, rbx, [x+40] | ||
188 | adcx r13, rbx | ||
189 | adcx r8, zero | ||
190 | |||
191 | // Now all the other rows in a uniform pattern | ||
192 | |||
193 | addrow 1 | ||
194 | addrow 2 | ||
195 | addrow 3 | ||
196 | addrow 4 | ||
197 | addrow 5 | ||
198 | |||
199 | // Now write back the additional columns | ||
200 | |||
201 | mov [z+48], r8 | ||
202 | mov [z+56], r9 | ||
203 | mov [z+64], r10 | ||
204 | mov [z+72], r11 | ||
205 | mov [z+80], r12 | ||
206 | mov [z+88], r13 | ||
207 | |||
208 | // Restore registers and return | ||
209 | |||
210 | pop r13 | ||
211 | pop r12 | ||
212 | pop rbx | ||
213 | pop rbp | ||
214 | |||
215 | #if WINDOWS_ABI | ||
216 | pop rsi | ||
217 | pop rdi | ||
218 | #endif | ||
219 | ret | ||
220 | |||
221 | #if defined(__linux__) && defined(__ELF__) | ||
222 | .section .note.GNU-stack,"",%progbits | ||
223 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12_alt.S new file mode 100644 index 0000000000..077c52b38e --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12_alt.S | |||
@@ -0,0 +1,199 @@ | |||
1 | // $OpenBSD: bignum_mul_6_12_alt.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
2 | // | ||
3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
4 | // | ||
5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
6 | // purpose with or without fee is hereby granted, provided that the above | ||
7 | // copyright notice and this permission notice appear in all copies. | ||
8 | // | ||
9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
16 | |||
17 | // ---------------------------------------------------------------------------- | ||
18 | // Multiply z := x * y | ||
19 | // Inputs x[6], y[6]; output z[12] | ||
20 | // | ||
21 | // extern void bignum_mul_6_12_alt(uint64_t z[static 12], | ||
22 | // const uint64_t x[static 6], | ||
23 | // const uint64_t y[static 6]); | ||
24 | // | ||
25 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y | ||
26 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y | ||
27 | // ---------------------------------------------------------------------------- | ||
28 | |||
29 | #include "s2n_bignum_internal.h" | ||
30 | |||
31 | .intel_syntax noprefix | ||
32 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_6_12_alt) | ||
33 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_6_12_alt) | ||
34 | .text | ||
35 | |||
36 | // These are actually right | ||
37 | |||
38 | #define z rdi | ||
39 | #define x rsi | ||
40 | |||
41 | // This is moved from rdx to free it for muls | ||
42 | |||
43 | #define y rcx | ||
44 | |||
45 | // Other variables used as a rotating 3-word window to add terms to | ||
46 | |||
47 | #define t0 r8 | ||
48 | #define t1 r9 | ||
49 | #define t2 r10 | ||
50 | |||
51 | // Macro for the key "multiply and add to (c,h,l)" step | ||
52 | |||
53 | #define combadd(c,h,l,numa,numb) \ | ||
54 | mov rax, numa; \ | ||
55 | mul QWORD PTR numb; \ | ||
56 | add l, rax; \ | ||
57 | adc h, rdx; \ | ||
58 | adc c, 0 | ||
59 | |||
60 | // A minutely shorter form for when c = 0 initially | ||
61 | |||
62 | #define combadz(c,h,l,numa,numb) \ | ||
63 | mov rax, numa; \ | ||
64 | mul QWORD PTR numb; \ | ||
65 | add l, rax; \ | ||
66 | adc h, rdx; \ | ||
67 | adc c, c | ||
68 | |||
69 | // A short form where we don't expect a top carry | ||
70 | |||
71 | #define combads(h,l,numa,numb) \ | ||
72 | mov rax, numa; \ | ||
73 | mul QWORD PTR numb; \ | ||
74 | add l, rax; \ | ||
75 | adc h, rdx | ||
76 | |||
77 | S2N_BN_SYMBOL(bignum_mul_6_12_alt): | ||
78 | _CET_ENDBR | ||
79 | |||
80 | #if WINDOWS_ABI | ||
81 | push rdi | ||
82 | push rsi | ||
83 | mov rdi, rcx | ||
84 | mov rsi, rdx | ||
85 | mov rdx, r8 | ||
86 | #endif | ||
87 | |||
88 | // Copy y into a safe register to start with | ||
89 | |||
90 | mov y, rdx | ||
91 | |||
92 | // Result term 0 | ||
93 | |||
94 | mov rax, [x] | ||
95 | mul QWORD PTR [y] | ||
96 | |||
97 | mov [z], rax | ||
98 | mov t0, rdx | ||
99 | xor t1, t1 | ||
100 | |||
101 | // Result term 1 | ||
102 | |||
103 | xor t2, t2 | ||
104 | combads(t1,t0,[x],[y+8]) | ||
105 | combadz(t2,t1,t0,[x+8],[y]) | ||
106 | mov [z+8], t0 | ||
107 | |||
108 | // Result term 2 | ||
109 | |||
110 | xor t0, t0 | ||
111 | combadz(t0,t2,t1,[x],[y+16]) | ||
112 | combadd(t0,t2,t1,[x+8],[y+8]) | ||
113 | combadd(t0,t2,t1,[x+16],[y]) | ||
114 | mov [z+16], t1 | ||
115 | |||
116 | // Result term 3 | ||
117 | |||
118 | xor t1, t1 | ||
119 | combadz(t1,t0,t2,[x],[y+24]) | ||
120 | combadd(t1,t0,t2,[x+8],[y+16]) | ||
121 | combadd(t1,t0,t2,[x+16],[y+8]) | ||
122 | combadd(t1,t0,t2,[x+24],[y]) | ||
123 | mov [z+24], t2 | ||
124 | |||
125 | // Result term 4 | ||
126 | |||
127 | xor t2, t2 | ||
128 | combadz(t2,t1,t0,[x],[y+32]) | ||
129 | combadd(t2,t1,t0,[x+8],[y+24]) | ||
130 | combadd(t2,t1,t0,[x+16],[y+16]) | ||
131 | combadd(t2,t1,t0,[x+24],[y+8]) | ||
132 | combadd(t2,t1,t0,[x+32],[y]) | ||
133 | mov [z+32], t0 | ||
134 | |||
135 | // Result term 5 | ||
136 | |||
137 | xor t0, t0 | ||
138 | combadz(t0,t2,t1,[x],[y+40]) | ||
139 | combadd(t0,t2,t1,[x+8],[y+32]) | ||
140 | combadd(t0,t2,t1,[x+16],[y+24]) | ||
141 | combadd(t0,t2,t1,[x+24],[y+16]) | ||
142 | combadd(t0,t2,t1,[x+32],[y+8]) | ||
143 | combadd(t0,t2,t1,[x+40],[y]) | ||
144 | mov [z+40], t1 | ||
145 | |||
146 | // Result term 6 | ||
147 | |||
148 | xor t1, t1 | ||
149 | combadz(t1,t0,t2,[x+8],[y+40]) | ||
150 | combadd(t1,t0,t2,[x+16],[y+32]) | ||
151 | combadd(t1,t0,t2,[x+24],[y+24]) | ||
152 | combadd(t1,t0,t2,[x+32],[y+16]) | ||
153 | combadd(t1,t0,t2,[x+40],[y+8]) | ||
154 | mov [z+48], t2 | ||
155 | |||
156 | // Result term 7 | ||
157 | |||
158 | xor t2, t2 | ||
159 | combadz(t2,t1,t0,[x+16],[y+40]) | ||
160 | combadd(t2,t1,t0,[x+24],[y+32]) | ||
161 | combadd(t2,t1,t0,[x+32],[y+24]) | ||
162 | combadd(t2,t1,t0,[x+40],[y+16]) | ||
163 | mov [z+56], t0 | ||
164 | |||
165 | // Result term 8 | ||
166 | |||
167 | xor t0, t0 | ||
168 | combadz(t0,t2,t1,[x+24],[y+40]) | ||
169 | combadd(t0,t2,t1,[x+32],[y+32]) | ||
170 | combadd(t0,t2,t1,[x+40],[y+24]) | ||
171 | mov [z+64], t1 | ||
172 | |||
173 | // Result term 9 | ||
174 | |||
175 | xor t1, t1 | ||
176 | combadz(t1,t0,t2,[x+32],[y+40]) | ||
177 | combadd(t1,t0,t2,[x+40],[y+32]) | ||
178 | mov [z+72], t2 | ||
179 | |||
180 | // Result term 10 | ||
181 | |||
182 | combads(t1,t0,[x+40],[y+40]) | ||
183 | mov [z+80], t0 | ||
184 | |||
185 | // Result term 11 | ||
186 | |||
187 | mov [z+88], t1 | ||
188 | |||
189 | // Return | ||
190 | |||
191 | #if WINDOWS_ABI | ||
192 | pop rsi | ||
193 | pop rdi | ||
194 | #endif | ||
195 | ret | ||
196 | |||
197 | #if defined(__linux__) && defined(__ELF__) | ||
198 | .section .note.GNU-stack,"",%progbits | ||
199 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16.S new file mode 100644 index 0000000000..faa0196d8e --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16.S | |||
@@ -0,0 +1,273 @@ | |||
1 | // $OpenBSD: bignum_mul_8_16.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
2 | // | ||
3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
4 | // | ||
5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
6 | // purpose with or without fee is hereby granted, provided that the above | ||
7 | // copyright notice and this permission notice appear in all copies. | ||
8 | // | ||
9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
16 | |||
17 | // ---------------------------------------------------------------------------- | ||
18 | // Multiply z := x * y | ||
19 | // Inputs x[8], y[8]; output z[16] | ||
20 | // | ||
21 | // extern void bignum_mul_8_16(uint64_t z[static 16], const uint64_t x[static 8], | ||
22 | // const uint64_t y[static 8]); | ||
23 | // | ||
24 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y | ||
25 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y | ||
26 | // ---------------------------------------------------------------------------- | ||
27 | |||
28 | #include "s2n_bignum_internal.h" | ||
29 | |||
30 | .intel_syntax noprefix | ||
31 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_8_16) | ||
32 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_8_16) | ||
33 | .text | ||
34 | |||
35 | // These are actually right | ||
36 | |||
37 | #define z rdi | ||
38 | #define x rsi | ||
39 | |||
40 | // Copied in or set up | ||
41 | |||
42 | #define y rcx | ||
43 | |||
44 | // A zero register | ||
45 | |||
46 | #define zero rbp | ||
47 | #define zeroe ebp | ||
48 | |||
49 | // mulpadd i, j adds x[i] * rdx (now assumed = y[j]) into the window at i+j | ||
50 | |||
51 | .macro mulpadd arg1,arg2 | ||
52 | mulx rbx, rax, [x+8*\arg1] | ||
53 | .if ((\arg1 + \arg2) % 8 == 0) | ||
54 | adcx r8, rax | ||
55 | adox r9, rbx | ||
56 | .elseif ((\arg1 + \arg2) % 8 == 1) | ||
57 | adcx r9, rax | ||
58 | adox r10, rbx | ||
59 | .elseif ((\arg1 + \arg2) % 8 == 2) | ||
60 | adcx r10, rax | ||
61 | adox r11, rbx | ||
62 | .elseif ((\arg1 + \arg2) % 8 == 3) | ||
63 | adcx r11, rax | ||
64 | adox r12, rbx | ||
65 | .elseif ((\arg1 + \arg2) % 8 == 4) | ||
66 | adcx r12, rax | ||
67 | adox r13, rbx | ||
68 | .elseif ((\arg1 + \arg2) % 8 == 5) | ||
69 | adcx r13, rax | ||
70 | adox r14, rbx | ||
71 | .elseif ((\arg1 + \arg2) % 8 == 6) | ||
72 | adcx r14, rax | ||
73 | adox r15, rbx | ||
74 | .elseif ((\arg1 + \arg2) % 8 == 7) | ||
75 | adcx r15, rax | ||
76 | adox r8, rbx | ||
77 | .endif | ||
78 | |||
79 | .endm | ||
80 | |||
81 | // mulpade i, j adds x[i] * rdx (now assumed = y[j]) into the window at i+j | ||
82 | // but re-creates the top word assuming nothing to add there | ||
83 | |||
84 | .macro mulpade arg1,arg2 | ||
85 | .if ((\arg1 + \arg2) % 8 == 0) | ||
86 | mulx r9, rax, [x+8*\arg1] | ||
87 | adcx r8, rax | ||
88 | adox r9, zero | ||
89 | .elseif ((\arg1 + \arg2) % 8 == 1) | ||
90 | mulx r10, rax, [x+8*\arg1] | ||
91 | adcx r9, rax | ||
92 | adox r10, zero | ||
93 | .elseif ((\arg1 + \arg2) % 8 == 2) | ||
94 | mulx r11, rax, [x+8*\arg1] | ||
95 | adcx r10, rax | ||
96 | adox r11, zero | ||
97 | .elseif ((\arg1 + \arg2) % 8 == 3) | ||
98 | mulx r12, rax, [x+8*\arg1] | ||
99 | adcx r11, rax | ||
100 | adox r12, zero | ||
101 | .elseif ((\arg1 + \arg2) % 8 == 4) | ||
102 | mulx r13, rax, [x+8*\arg1] | ||
103 | adcx r12, rax | ||
104 | adox r13, zero | ||
105 | .elseif ((\arg1 + \arg2) % 8 == 5) | ||
106 | mulx r14, rax, [x+8*\arg1] | ||
107 | adcx r13, rax | ||
108 | adox r14, zero | ||
109 | .elseif ((\arg1 + \arg2) % 8 == 6) | ||
110 | mulx r15, rax, [x+8*\arg1] | ||
111 | adcx r14, rax | ||
112 | adox r15, zero | ||
113 | .elseif ((\arg1 + \arg2) % 8 == 7) | ||
114 | mulx r8, rax, [x+8*\arg1] | ||
115 | adcx r15, rax | ||
116 | adox r8, zero | ||
117 | .endif | ||
118 | |||
119 | .endm | ||
120 | |||
121 | // Add in the whole j'th row | ||
122 | |||
123 | .macro addrow arg1 | ||
124 | mov rdx, [y+8*\arg1] | ||
125 | xor zeroe, zeroe | ||
126 | |||
127 | mulpadd 0, \arg1 | ||
128 | |||
129 | .if (\arg1 % 8 == 0) | ||
130 | mov [z+8*\arg1],r8 | ||
131 | .elseif (\arg1 % 8 == 1) | ||
132 | mov [z+8*\arg1],r9 | ||
133 | .elseif (\arg1 % 8 == 2) | ||
134 | mov [z+8*\arg1],r10 | ||
135 | .elseif (\arg1 % 8 == 3) | ||
136 | mov [z+8*\arg1],r11 | ||
137 | .elseif (\arg1 % 8 == 4) | ||
138 | mov [z+8*\arg1],r12 | ||
139 | .elseif (\arg1 % 8 == 5) | ||
140 | mov [z+8*\arg1],r13 | ||
141 | .elseif (\arg1 % 8 == 6) | ||
142 | mov [z+8*\arg1],r14 | ||
143 | .elseif (\arg1 % 8 == 7) | ||
144 | mov [z+8*\arg1],r15 | ||
145 | .endif | ||
146 | |||
147 | mulpadd 1, \arg1 | ||
148 | mulpadd 2, \arg1 | ||
149 | mulpadd 3, \arg1 | ||
150 | mulpadd 4, \arg1 | ||
151 | mulpadd 5, \arg1 | ||
152 | mulpadd 6, \arg1 | ||
153 | mulpade 7, \arg1 | ||
154 | |||
155 | .if (\arg1 % 8 == 0) | ||
156 | adc r8, zero | ||
157 | .elseif (\arg1 % 8 == 1) | ||
158 | adc r9, zero | ||
159 | .elseif (\arg1 % 8 == 2) | ||
160 | adc r10, zero | ||
161 | .elseif (\arg1 % 8 == 3) | ||
162 | adc r11, zero | ||
163 | .elseif (\arg1 % 8 == 4) | ||
164 | adc r12, zero | ||
165 | .elseif (\arg1 % 8 == 5) | ||
166 | adc r13, zero | ||
167 | .elseif (\arg1 % 8 == 6) | ||
168 | adc r14, zero | ||
169 | .elseif (\arg1 % 8 == 7) | ||
170 | adc r15, zero | ||
171 | .endif | ||
172 | |||
173 | .endm | ||
174 | |||
175 | |||
176 | S2N_BN_SYMBOL(bignum_mul_8_16): | ||
177 | _CET_ENDBR | ||
178 | |||
179 | #if WINDOWS_ABI | ||
180 | push rdi | ||
181 | push rsi | ||
182 | mov rdi, rcx | ||
183 | mov rsi, rdx | ||
184 | mov rdx, r8 | ||
185 | #endif | ||
186 | |||
187 | // Save more registers to play with | ||
188 | |||
189 | push rbp | ||
190 | push rbx | ||
191 | push r12 | ||
192 | push r13 | ||
193 | push r14 | ||
194 | push r15 | ||
195 | |||
196 | // Copy y into a safe register to start with | ||
197 | |||
198 | mov y, rdx | ||
199 | |||
200 | // Zero a register, which also makes sure we don't get a fake carry-in | ||
201 | |||
202 | xor zeroe, zeroe | ||
203 | |||
204 | // Do the zeroth row, which is a bit different | ||
205 | // Write back the zero-zero product and then accumulate | ||
206 | // r8,r15,r14,r13,r12,r11,r10,r9 as y[0] * x from 1..8 | ||
207 | |||
208 | mov rdx, [y] | ||
209 | |||
210 | mulx r9, r8, [x] | ||
211 | mov [z], r8 | ||
212 | |||
213 | mulx r10, rbx, [x+8] | ||
214 | adc r9, rbx | ||
215 | |||
216 | mulx r11, rbx, [x+16] | ||
217 | adc r10, rbx | ||
218 | |||
219 | mulx r12, rbx, [x+24] | ||
220 | adc r11, rbx | ||
221 | |||
222 | mulx r13, rbx, [x+32] | ||
223 | adc r12, rbx | ||
224 | |||
225 | mulx r14, rbx, [x+40] | ||
226 | adc r13, rbx | ||
227 | |||
228 | mulx r15, rbx, [x+48] | ||
229 | adc r14, rbx | ||
230 | |||
231 | mulx r8, rbx, [x+56] | ||
232 | adc r15, rbx | ||
233 | adc r8, zero | ||
234 | |||
235 | // Now all the other rows in a uniform pattern | ||
236 | |||
237 | addrow 1 | ||
238 | addrow 2 | ||
239 | addrow 3 | ||
240 | addrow 4 | ||
241 | addrow 5 | ||
242 | addrow 6 | ||
243 | addrow 7 | ||
244 | |||
245 | // Now write back the additional columns | ||
246 | |||
247 | mov [z+64], r8 | ||
248 | mov [z+72], r9 | ||
249 | mov [z+80], r10 | ||
250 | mov [z+88], r11 | ||
251 | mov [z+96], r12 | ||
252 | mov [z+104], r13 | ||
253 | mov [z+112], r14 | ||
254 | mov [z+120], r15 | ||
255 | |||
256 | // Real epilog | ||
257 | |||
258 | pop r15 | ||
259 | pop r14 | ||
260 | pop r13 | ||
261 | pop r12 | ||
262 | pop rbx | ||
263 | pop rbp | ||
264 | |||
265 | #if WINDOWS_ABI | ||
266 | pop rsi | ||
267 | pop rdi | ||
268 | #endif | ||
269 | ret | ||
270 | |||
271 | #if defined(__linux__) && defined(__ELF__) | ||
272 | .section .note.GNU-stack,"",%progbits | ||
273 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S index 066403b074..0e30b9170f 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S | |||
@@ -1,3 +1,5 @@ | |||
1 | // $OpenBSD: bignum_mul_8_16_alt.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
2 | // | ||
1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
2 | // | 4 | // |
3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
@@ -16,8 +18,9 @@ | |||
16 | // Multiply z := x * y | 18 | // Multiply z := x * y |
17 | // Inputs x[8], y[8]; output z[16] | 19 | // Inputs x[8], y[8]; output z[16] |
18 | // | 20 | // |
19 | // extern void bignum_mul_8_16_alt | 21 | // extern void bignum_mul_8_16_alt(uint64_t z[static 16], |
20 | // (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]); | 22 | // const uint64_t x[static 8], |
23 | // const uint64_t y[static 8]); | ||
21 | // | 24 | // |
22 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y | 25 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y |
23 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y | 26 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y |
@@ -72,7 +75,7 @@ | |||
72 | adc h, rdx | 75 | adc h, rdx |
73 | 76 | ||
74 | S2N_BN_SYMBOL(bignum_mul_8_16_alt): | 77 | S2N_BN_SYMBOL(bignum_mul_8_16_alt): |
75 | _CET_ENDBR | 78 | _CET_ENDBR |
76 | 79 | ||
77 | #if WINDOWS_ABI | 80 | #if WINDOWS_ABI |
78 | push rdi | 81 | push rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S index 54e3f59442..86f1af2ac4 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S | |||
@@ -1,3 +1,5 @@ | |||
1 | // $OpenBSD: bignum_sqr.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
2 | // | ||
1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
2 | // | 4 | // |
3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
@@ -16,8 +18,7 @@ | |||
16 | // Square z := x^2 | 18 | // Square z := x^2 |
17 | // Input x[n]; output z[k] | 19 | // Input x[n]; output z[k] |
18 | // | 20 | // |
19 | // extern void bignum_sqr | 21 | // extern void bignum_sqr(uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x); |
20 | // (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x); | ||
21 | // | 22 | // |
22 | // Does the "z := x^2" operation where x is n digits and result z is k. | 23 | // Does the "z := x^2" operation where x is n digits and result z is k. |
23 | // Truncates the result in general unless k >= 2 * n | 24 | // Truncates the result in general unless k >= 2 * n |
@@ -62,7 +63,7 @@ | |||
62 | #define llshort ebp | 63 | #define llshort ebp |
63 | 64 | ||
64 | S2N_BN_SYMBOL(bignum_sqr): | 65 | S2N_BN_SYMBOL(bignum_sqr): |
65 | _CET_ENDBR | 66 | _CET_ENDBR |
66 | 67 | ||
67 | #if WINDOWS_ABI | 68 | #if WINDOWS_ABI |
68 | push rdi | 69 | push rdi |
@@ -86,7 +87,7 @@ S2N_BN_SYMBOL(bignum_sqr): | |||
86 | // If p = 0 the result is trivial and nothing needs doing | 87 | // If p = 0 the result is trivial and nothing needs doing |
87 | 88 | ||
88 | test p, p | 89 | test p, p |
89 | jz end | 90 | jz bignum_sqr_end |
90 | 91 | ||
91 | // initialize (hh,ll) = 0 | 92 | // initialize (hh,ll) = 0 |
92 | 93 | ||
@@ -97,7 +98,7 @@ S2N_BN_SYMBOL(bignum_sqr): | |||
97 | 98 | ||
98 | xor k, k | 99 | xor k, k |
99 | 100 | ||
100 | outerloop: | 101 | bignum_sqr_outerloop: |
101 | 102 | ||
102 | // First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n | 103 | // First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n |
103 | // We want to accumulate all x[i] * x[k - i] for bot <= i < top | 104 | // We want to accumulate all x[i] * x[k - i] for bot <= i < top |
@@ -122,7 +123,7 @@ outerloop: | |||
122 | // If htop <= bot then main doubled part of the sum is empty | 123 | // If htop <= bot then main doubled part of the sum is empty |
123 | 124 | ||
124 | cmp i, htop | 125 | cmp i, htop |
125 | jnc nosumming | 126 | jnc bignum_sqr_nosumming |
126 | 127 | ||
127 | // Use a moving pointer for [y] = x[k-i] for the cofactor | 128 | // Use a moving pointer for [y] = x[k-i] for the cofactor |
128 | 129 | ||
@@ -132,7 +133,7 @@ outerloop: | |||
132 | 133 | ||
133 | // Do the main part of the sum x[i] * x[k - i] for 2 * i < k | 134 | // Do the main part of the sum x[i] * x[k - i] for 2 * i < k |
134 | 135 | ||
135 | innerloop: | 136 | bignum_sqr_innerloop: |
136 | mov a, [x+8*i] | 137 | mov a, [x+8*i] |
137 | mul QWORD PTR [y] | 138 | mul QWORD PTR [y] |
138 | add l, a | 139 | add l, a |
@@ -141,7 +142,7 @@ innerloop: | |||
141 | sub y, 8 | 142 | sub y, 8 |
142 | inc i | 143 | inc i |
143 | cmp i, htop | 144 | cmp i, htop |
144 | jc innerloop | 145 | jc bignum_sqr_innerloop |
145 | 146 | ||
146 | // Now double it | 147 | // Now double it |
147 | 148 | ||
@@ -151,11 +152,11 @@ innerloop: | |||
151 | 152 | ||
152 | // If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term | 153 | // If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term |
153 | 154 | ||
154 | nosumming: | 155 | bignum_sqr_nosumming: |
155 | test k, 1 | 156 | test k, 1 |
156 | jnz innerend | 157 | jnz bignum_sqr_innerend |
157 | cmp i, n | 158 | cmp i, n |
158 | jnc innerend | 159 | jnc bignum_sqr_innerend |
159 | 160 | ||
160 | mov a, [x+8*i] | 161 | mov a, [x+8*i] |
161 | mul a | 162 | mul a |
@@ -165,7 +166,7 @@ nosumming: | |||
165 | 166 | ||
166 | // Now add the local sum into the global sum, store and shift | 167 | // Now add the local sum into the global sum, store and shift |
167 | 168 | ||
168 | innerend: | 169 | bignum_sqr_innerend: |
169 | add l, ll | 170 | add l, ll |
170 | mov [z+8*k], l | 171 | mov [z+8*k], l |
171 | adc h, hh | 172 | adc h, hh |
@@ -175,11 +176,11 @@ innerend: | |||
175 | 176 | ||
176 | inc k | 177 | inc k |
177 | cmp k, p | 178 | cmp k, p |
178 | jc outerloop | 179 | jc bignum_sqr_outerloop |
179 | 180 | ||
180 | // Restore registers and return | 181 | // Restore registers and return |
181 | 182 | ||
182 | end: | 183 | bignum_sqr_end: |
183 | pop r15 | 184 | pop r15 |
184 | pop r14 | 185 | pop r14 |
185 | pop r13 | 186 | pop r13 |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8.S new file mode 100644 index 0000000000..25664782f7 --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8.S | |||
@@ -0,0 +1,158 @@ | |||
1 | // $OpenBSD: bignum_sqr_4_8.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
2 | // | ||
3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
4 | // | ||
5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
6 | // purpose with or without fee is hereby granted, provided that the above | ||
7 | // copyright notice and this permission notice appear in all copies. | ||
8 | // | ||
9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
16 | |||
17 | // ---------------------------------------------------------------------------- | ||
18 | // Square, z := x^2 | ||
19 | // Input x[4]; output z[8] | ||
20 | // | ||
21 | // extern void bignum_sqr_4_8(uint64_t z[static 8], const uint64_t x[static 4]); | ||
22 | // | ||
23 | // Standard x86-64 ABI: RDI = z, RSI = x | ||
24 | // Microsoft x64 ABI: RCX = z, RDX = x | ||
25 | // ---------------------------------------------------------------------------- | ||
26 | |||
27 | #include "s2n_bignum_internal.h" | ||
28 | |||
29 | .intel_syntax noprefix | ||
30 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_4_8) | ||
31 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_4_8) | ||
32 | .text | ||
33 | |||
34 | // These are actually right | ||
35 | |||
36 | #define z rdi | ||
37 | #define x rsi | ||
38 | |||
39 | // A zero register | ||
40 | |||
41 | #define zero rbp | ||
42 | #define zeroe ebp | ||
43 | |||
44 | // Other registers | ||
45 | |||
46 | #define d1 r8 | ||
47 | #define d2 r9 | ||
48 | #define d3 r10 | ||
49 | #define d4 r11 | ||
50 | #define d5 r12 | ||
51 | #define d6 r13 | ||
52 | |||
53 | |||
54 | |||
55 | S2N_BN_SYMBOL(bignum_sqr_4_8): | ||
56 | _CET_ENDBR | ||
57 | |||
58 | #if WINDOWS_ABI | ||
59 | push rdi | ||
60 | push rsi | ||
61 | mov rdi, rcx | ||
62 | mov rsi, rdx | ||
63 | #endif | ||
64 | |||
65 | // Save more registers to play with | ||
66 | |||
67 | push rbp | ||
68 | push r12 | ||
69 | push r13 | ||
70 | |||
71 | // Set up an initial window [d6;...d1] = [23;03;01] | ||
72 | |||
73 | mov rdx, [x] | ||
74 | mulx d2, d1, [x+8] | ||
75 | mulx d4, d3, [x+24] | ||
76 | mov rdx, [x+16] | ||
77 | mulx d6, d5, [x+24] | ||
78 | |||
79 | // Clear our zero register, and also initialize the flags for the carry chain | ||
80 | |||
81 | xor zeroe, zeroe | ||
82 | |||
83 | // Chain in the addition of 02 + 12 + 13 to that window (no carry-out possible) | ||
84 | // This gives all the "heterogeneous" terms of the squaring ready to double | ||
85 | |||
86 | mulx rcx, rax, [x] | ||
87 | adcx d2, rax | ||
88 | adox d3, rcx | ||
89 | mulx rcx, rax, [x+8] | ||
90 | adcx d3, rax | ||
91 | adox d4, rcx | ||
92 | mov rdx, [x+24] | ||
93 | mulx rcx, rax, [x+8] | ||
94 | adcx d4, rax | ||
95 | adox d5, rcx | ||
96 | adcx d5, zero | ||
97 | adox d6, zero | ||
98 | adcx d6, zero | ||
99 | |||
100 | // In principle this is otiose as CF and OF carries are absorbed at this point | ||
101 | // However it seems helpful for the OOO engine to be told it's a fresh start | ||
102 | |||
103 | xor zeroe, zeroe | ||
104 | |||
105 | // Double and add to the 00 + 11 + 22 + 33 terms | ||
106 | // | ||
107 | // We could use shift-double but this seems tidier and in larger squarings | ||
108 | // it was actually more efficient. I haven't experimented with this small | ||
109 | // case to see how much that matters. Note: the writeback here is sprinkled | ||
110 | // into the sequence in such a way that things still work if z = x, i.e. if | ||
111 | // the output overwrites the input buffer and beyond. | ||
112 | |||
113 | mov rdx, [x] | ||
114 | mulx rdx, rax, rdx | ||
115 | mov [z], rax | ||
116 | adcx d1, d1 | ||
117 | adox d1, rdx | ||
118 | mov rdx, [x+8] | ||
119 | mov [z+8], d1 | ||
120 | mulx rdx, rax, rdx | ||
121 | adcx d2, d2 | ||
122 | adox d2, rax | ||
123 | adcx d3, d3 | ||
124 | adox d3, rdx | ||
125 | mov rdx, [x+16] | ||
126 | mov [z+16], d2 | ||
127 | mulx rdx, rax, rdx | ||
128 | adcx d4, d4 | ||
129 | adox d4, rax | ||
130 | adcx d5, d5 | ||
131 | adox d5, rdx | ||
132 | mov rdx, [x+24] | ||
133 | mov [z+24], d3 | ||
134 | mulx rdx, rax, rdx | ||
135 | mov [z+32], d4 | ||
136 | adcx d6, d6 | ||
137 | mov [z+40], d5 | ||
138 | adox d6, rax | ||
139 | mov [z+48], d6 | ||
140 | adcx rdx, zero | ||
141 | adox rdx, zero | ||
142 | mov [z+56], rdx | ||
143 | |||
144 | // Restore saved registers and return | ||
145 | |||
146 | pop r13 | ||
147 | pop r12 | ||
148 | pop rbp | ||
149 | |||
150 | #if WINDOWS_ABI | ||
151 | pop rsi | ||
152 | pop rdi | ||
153 | #endif | ||
154 | ret | ||
155 | |||
156 | #if defined(__linux__) && defined(__ELF__) | ||
157 | .section .note.GNU-stack,"",%progbits | ||
158 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S index 7c534ae907..7eafac3284 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S | |||
@@ -1,3 +1,5 @@ | |||
1 | // $OpenBSD: bignum_sqr_4_8_alt.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
2 | // | ||
1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
2 | // | 4 | // |
3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
@@ -16,8 +18,8 @@ | |||
16 | // Square, z := x^2 | 18 | // Square, z := x^2 |
17 | // Input x[4]; output z[8] | 19 | // Input x[4]; output z[8] |
18 | // | 20 | // |
19 | // extern void bignum_sqr_4_8_alt | 21 | // extern void bignum_sqr_4_8_alt(uint64_t z[static 8], |
20 | // (uint64_t z[static 8], uint64_t x[static 4]); | 22 | // const uint64_t x[static 4]); |
21 | // | 23 | // |
22 | // Standard x86-64 ABI: RDI = z, RSI = x | 24 | // Standard x86-64 ABI: RDI = z, RSI = x |
23 | // Microsoft x64 ABI: RCX = z, RDX = x | 25 | // Microsoft x64 ABI: RCX = z, RDX = x |
@@ -71,7 +73,7 @@ | |||
71 | adc c, 0 | 73 | adc c, 0 |
72 | 74 | ||
73 | S2N_BN_SYMBOL(bignum_sqr_4_8_alt): | 75 | S2N_BN_SYMBOL(bignum_sqr_4_8_alt): |
74 | _CET_ENDBR | 76 | _CET_ENDBR |
75 | 77 | ||
76 | #if WINDOWS_ABI | 78 | #if WINDOWS_ABI |
77 | push rdi | 79 | push rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12.S new file mode 100644 index 0000000000..3f055e8b75 --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12.S | |||
@@ -0,0 +1,227 @@ | |||
1 | // $OpenBSD: bignum_sqr_6_12.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
2 | // | ||
3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
4 | // | ||
5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
6 | // purpose with or without fee is hereby granted, provided that the above | ||
7 | // copyright notice and this permission notice appear in all copies. | ||
8 | // | ||
9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
16 | |||
17 | // ---------------------------------------------------------------------------- | ||
18 | // Square, z := x^2 | ||
19 | // Input x[6]; output z[12] | ||
20 | // | ||
21 | // extern void bignum_sqr_6_12(uint64_t z[static 12], const uint64_t x[static 6]); | ||
22 | // | ||
23 | // Standard x86-64 ABI: RDI = z, RSI = x | ||
24 | // Microsoft x64 ABI: RCX = z, RDX = x | ||
25 | // ---------------------------------------------------------------------------- | ||
26 | |||
27 | #include "s2n_bignum_internal.h" | ||
28 | |||
29 | .intel_syntax noprefix | ||
30 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_6_12) | ||
31 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_6_12) | ||
32 | .text | ||
33 | |||
34 | // These are actually right | ||
35 | |||
36 | #define z rdi | ||
37 | #define x rsi | ||
38 | |||
39 | // A zero register | ||
40 | |||
41 | #define zero rbp | ||
42 | #define zeroe ebp | ||
43 | |||
44 | // Other registers | ||
45 | |||
46 | #define d1 r8 | ||
47 | #define d2 r9 | ||
48 | #define d3 r10 | ||
49 | #define d4 r11 | ||
50 | #define d5 r12 | ||
51 | #define d6 r13 | ||
52 | #define d7 r14 | ||
53 | #define d8 r15 | ||
54 | #define d9 rbx | ||
55 | |||
56 | // Care is needed: re-using the zero register | ||
57 | |||
58 | #define d10 rbp | ||
59 | |||
60 | |||
61 | S2N_BN_SYMBOL(bignum_sqr_6_12): | ||
62 | _CET_ENDBR | ||
63 | |||
64 | #if WINDOWS_ABI | ||
65 | push rdi | ||
66 | push rsi | ||
67 | mov rdi, rcx | ||
68 | mov rsi, rdx | ||
69 | #endif | ||
70 | |||
71 | // Save more registers to play with | ||
72 | |||
73 | push rbp | ||
74 | push rbx | ||
75 | push r12 | ||
76 | push r13 | ||
77 | push r14 | ||
78 | push r15 | ||
79 | |||
80 | // Set up an initial window [d8;...d1] = [34;05;03;01] | ||
81 | |||
82 | mov rdx, [x] | ||
83 | mulx d2, d1, [x+8] | ||
84 | mulx d4, d3, [x+24] | ||
85 | mulx d6, d5, [x+40] | ||
86 | mov rdx, [x+24] | ||
87 | mulx d8, d7, [x+32] | ||
88 | |||
89 | // Clear our zero register, and also initialize the flags for the carry chain | ||
90 | |||
91 | xor zeroe, zeroe | ||
92 | |||
93 | // Chain in the addition of 02 + 12 + 13 + 14 + 15 to that window | ||
94 | // (no carry-out possible since we add it to the top of a product) | ||
95 | |||
96 | mov rdx, [x+16] | ||
97 | mulx rcx, rax, [x] | ||
98 | adcx d2, rax | ||
99 | adox d3, rcx | ||
100 | mulx rcx, rax, [x+8] | ||
101 | adcx d3, rax | ||
102 | adox d4, rcx | ||
103 | mov rdx, [x+8] | ||
104 | mulx rcx, rax, [x+24] | ||
105 | adcx d4, rax | ||
106 | adox d5, rcx | ||
107 | mulx rcx, rax, [x+32] | ||
108 | adcx d5, rax | ||
109 | adox d6, rcx | ||
110 | mulx rcx, rax, [x+40] | ||
111 | adcx d6, rax | ||
112 | adox d7, rcx | ||
113 | adcx d7, zero | ||
114 | adox d8, zero | ||
115 | adcx d8, zero | ||
116 | |||
117 | // Again zero out the flags. Actually they are already cleared but it may | ||
118 | // help decouple these in the OOO engine not to wait for the chain above | ||
119 | |||
120 | xor zeroe, zeroe | ||
121 | |||
122 | // Now chain in the 04 + 23 + 24 + 25 + 35 + 45 terms | ||
123 | // We are running out of registers and here our zero register is not zero! | ||
124 | |||
125 | mov rdx, [x+32] | ||
126 | mulx rcx, rax, [x] | ||
127 | adcx d4, rax | ||
128 | adox d5, rcx | ||
129 | mov rdx, [x+16] | ||
130 | mulx rcx, rax, [x+24] | ||
131 | adcx d5, rax | ||
132 | adox d6, rcx | ||
133 | mulx rcx, rax, [x+32] | ||
134 | adcx d6, rax | ||
135 | adox d7, rcx | ||
136 | mulx rcx, rax, [x+40] | ||
137 | adcx d7, rax | ||
138 | adox d8, rcx | ||
139 | mov rdx, [x+24] | ||
140 | mulx d9, rax, [x+40] | ||
141 | adcx d8, rax | ||
142 | adox d9, zero | ||
143 | mov rdx, [x+32] | ||
144 | mulx d10, rax, [x+40] | ||
145 | adcx d9, rax | ||
146 | mov eax, 0 | ||
147 | adox d10, rax | ||
148 | adcx d10, rax | ||
149 | |||
150 | // Again, just for a clear fresh start for the flags | ||
151 | |||
152 | xor eax, eax | ||
153 | |||
154 | // Double and add to the 00 + 11 + 22 + 33 + 44 + 55 terms | ||
155 | // | ||
156 | // We could use shift-double but this seems tidier and in larger squarings | ||
157 | // it was actually more efficient. I haven't experimented with this small | ||
158 | // case to see how much that matters. Note: the writeback here is sprinkled | ||
159 | // into the sequence in such a way that things still work if z = x, i.e. if | ||
160 | // the output overwrites the input buffer and beyond. | ||
161 | |||
162 | mov rdx, [x] | ||
163 | mulx rdx, rax, rdx | ||
164 | mov [z], rax | ||
165 | adcx d1, d1 | ||
166 | adox d1, rdx | ||
167 | mov rdx, [x+8] | ||
168 | mov [z+8], d1 | ||
169 | mulx rdx, rax, rdx | ||
170 | adcx d2, d2 | ||
171 | adox d2, rax | ||
172 | adcx d3, d3 | ||
173 | adox d3, rdx | ||
174 | mov rdx, [x+16] | ||
175 | mov [z+16], d2 | ||
176 | mulx rdx, rax, rdx | ||
177 | adcx d4, d4 | ||
178 | adox d4, rax | ||
179 | adcx d5, d5 | ||
180 | adox d5, rdx | ||
181 | mov rdx, [x+24] | ||
182 | mov [z+24], d3 | ||
183 | mulx rdx, rax, rdx | ||
184 | adcx d6, d6 | ||
185 | adox d6, rax | ||
186 | adcx d7, d7 | ||
187 | adox d7, rdx | ||
188 | mov rdx, [x+32] | ||
189 | mov [z+32], d4 | ||
190 | mulx rdx, rax, rdx | ||
191 | adcx d8, d8 | ||
192 | adox d8, rax | ||
193 | adcx d9, d9 | ||
194 | adox d9, rdx | ||
195 | mov rdx, [x+40] | ||
196 | mov [z+40], d5 | ||
197 | mulx rdx, rax, rdx | ||
198 | mov [z+48], d6 | ||
199 | adcx d10, d10 | ||
200 | mov [z+56], d7 | ||
201 | adox d10, rax | ||
202 | mov [z+64], d8 | ||
203 | mov eax, 0 | ||
204 | mov [z+72], d9 | ||
205 | adcx rdx, rax | ||
206 | mov [z+80], d10 | ||
207 | adox rdx, rax | ||
208 | mov [z+88], rdx | ||
209 | |||
210 | // Restore saved registers and return | ||
211 | |||
212 | pop r15 | ||
213 | pop r14 | ||
214 | pop r13 | ||
215 | pop r12 | ||
216 | pop rbx | ||
217 | pop rbp | ||
218 | |||
219 | #if WINDOWS_ABI | ||
220 | pop rsi | ||
221 | pop rdi | ||
222 | #endif | ||
223 | ret | ||
224 | |||
225 | #if defined(__linux__) && defined(__ELF__) | ||
226 | .section .note.GNU-stack,"",%progbits | ||
227 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12_alt.S new file mode 100644 index 0000000000..eb43b0a15b --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12_alt.S | |||
@@ -0,0 +1,210 @@ | |||
1 | // $OpenBSD: bignum_sqr_6_12_alt.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
2 | // | ||
3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
4 | // | ||
5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
6 | // purpose with or without fee is hereby granted, provided that the above | ||
7 | // copyright notice and this permission notice appear in all copies. | ||
8 | // | ||
9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
16 | |||
17 | // ---------------------------------------------------------------------------- | ||
18 | // Square, z := x^2 | ||
19 | // Input x[6]; output z[12] | ||
20 | // | ||
21 | // extern void bignum_sqr_6_12_alt(uint64_t z[static 12], | ||
22 | // const uint64_t x[static 6]); | ||
23 | // | ||
24 | // Standard x86-64 ABI: RDI = z, RSI = x | ||
25 | // Microsoft x64 ABI: RCX = z, RDX = x | ||
26 | // ---------------------------------------------------------------------------- | ||
27 | |||
28 | #include "s2n_bignum_internal.h" | ||
29 | |||
30 | .intel_syntax noprefix | ||
31 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_6_12_alt) | ||
32 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_6_12_alt) | ||
33 | .text | ||
34 | |||
35 | // Input arguments | ||
36 | |||
37 | #define z rdi | ||
38 | #define x rsi | ||
39 | |||
40 | // Other variables used as a rotating 3-word window to add terms to | ||
41 | |||
42 | #define t0 r8 | ||
43 | #define t1 r9 | ||
44 | #define t2 r10 | ||
45 | |||
46 | // Additional temporaries for local windows to share doublings | ||
47 | |||
48 | #define u0 rcx | ||
49 | #define u1 r11 | ||
50 | |||
51 | // Macro for the key "multiply and add to (c,h,l)" step | ||
52 | |||
53 | #define combadd(c,h,l,numa,numb) \ | ||
54 | mov rax, numa; \ | ||
55 | mul QWORD PTR numb; \ | ||
56 | add l, rax; \ | ||
57 | adc h, rdx; \ | ||
58 | adc c, 0 | ||
59 | |||
60 | // Set up initial window (c,h,l) = numa * numb | ||
61 | |||
62 | #define combaddz(c,h,l,numa,numb) \ | ||
63 | mov rax, numa; \ | ||
64 | mul QWORD PTR numb; \ | ||
65 | xor c, c; \ | ||
66 | mov l, rax; \ | ||
67 | mov h, rdx | ||
68 | |||
69 | // Doubling step (c,h,l) = 2 * (c,hh,ll) + (0,h,l) | ||
70 | |||
71 | #define doubladd(c,h,l,hh,ll) \ | ||
72 | add ll, ll; \ | ||
73 | adc hh, hh; \ | ||
74 | adc c, c; \ | ||
75 | add l, ll; \ | ||
76 | adc h, hh; \ | ||
77 | adc c, 0 | ||
78 | |||
79 | // Square term incorporation (c,h,l) += numba^2 | ||
80 | |||
81 | #define combadd1(c,h,l,numa) \ | ||
82 | mov rax, numa; \ | ||
83 | mul rax; \ | ||
84 | add l, rax; \ | ||
85 | adc h, rdx; \ | ||
86 | adc c, 0 | ||
87 | |||
88 | // A short form where we don't expect a top carry | ||
89 | |||
90 | #define combads(h,l,numa) \ | ||
91 | mov rax, numa; \ | ||
92 | mul rax; \ | ||
93 | add l, rax; \ | ||
94 | adc h, rdx | ||
95 | |||
96 | // A version doubling directly before adding, for single non-square terms | ||
97 | |||
98 | #define combadd2(c,h,l,numa,numb) \ | ||
99 | mov rax, numa; \ | ||
100 | mul QWORD PTR numb; \ | ||
101 | add rax, rax; \ | ||
102 | adc rdx, rdx; \ | ||
103 | adc c, 0; \ | ||
104 | add l, rax; \ | ||
105 | adc h, rdx; \ | ||
106 | adc c, 0 | ||
107 | |||
108 | S2N_BN_SYMBOL(bignum_sqr_6_12_alt): | ||
109 | _CET_ENDBR | ||
110 | |||
111 | #if WINDOWS_ABI | ||
112 | push rdi | ||
113 | push rsi | ||
114 | mov rdi, rcx | ||
115 | mov rsi, rdx | ||
116 | #endif | ||
117 | |||
118 | // Result term 0 | ||
119 | |||
120 | mov rax, [x] | ||
121 | mul rax | ||
122 | |||
123 | mov [z], rax | ||
124 | mov t0, rdx | ||
125 | xor t1, t1 | ||
126 | |||
127 | // Result term 1 | ||
128 | |||
129 | xor t2, t2 | ||
130 | combadd2(t2,t1,t0,[x],[x+8]) | ||
131 | mov [z+8], t0 | ||
132 | |||
133 | // Result term 2 | ||
134 | |||
135 | xor t0, t0 | ||
136 | combadd1(t0,t2,t1,[x+8]) | ||
137 | combadd2(t0,t2,t1,[x],[x+16]) | ||
138 | mov [z+16], t1 | ||
139 | |||
140 | // Result term 3 | ||
141 | |||
142 | combaddz(t1,u1,u0,[x],[x+24]) | ||
143 | combadd(t1,u1,u0,[x+8],[x+16]) | ||
144 | doubladd(t1,t0,t2,u1,u0) | ||
145 | mov [z+24], t2 | ||
146 | |||
147 | // Result term 4 | ||
148 | |||
149 | combaddz(t2,u1,u0,[x],[x+32]) | ||
150 | combadd(t2,u1,u0,[x+8],[x+24]) | ||
151 | doubladd(t2,t1,t0,u1,u0) | ||
152 | combadd1(t2,t1,t0,[x+16]) | ||
153 | mov [z+32], t0 | ||
154 | |||
155 | // Result term 5 | ||
156 | |||
157 | combaddz(t0,u1,u0,[x],[x+40]) | ||
158 | combadd(t0,u1,u0,[x+8],[x+32]) | ||
159 | combadd(t0,u1,u0,[x+16],[x+24]) | ||
160 | doubladd(t0,t2,t1,u1,u0) | ||
161 | mov [z+40], t1 | ||
162 | |||
163 | // Result term 6 | ||
164 | |||
165 | combaddz(t1,u1,u0,[x+8],[x+40]) | ||
166 | combadd(t1,u1,u0,[x+16],[x+32]) | ||
167 | doubladd(t1,t0,t2,u1,u0) | ||
168 | combadd1(t1,t0,t2,[x+24]) | ||
169 | mov [z+48], t2 | ||
170 | |||
171 | // Result term 7 | ||
172 | |||
173 | combaddz(t2,u1,u0,[x+16],[x+40]) | ||
174 | combadd(t2,u1,u0,[x+24],[x+32]) | ||
175 | doubladd(t2,t1,t0,u1,u0) | ||
176 | mov [z+56], t0 | ||
177 | |||
178 | // Result term 8 | ||
179 | |||
180 | xor t0, t0 | ||
181 | combadd2(t0,t2,t1,[x+24],[x+40]) | ||
182 | combadd1(t0,t2,t1,[x+32]) | ||
183 | mov [z+64], t1 | ||
184 | |||
185 | // Result term 9 | ||
186 | |||
187 | xor t1, t1 | ||
188 | combadd2(t1,t0,t2,[x+32],[x+40]) | ||
189 | mov [z+72], t2 | ||
190 | |||
191 | // Result term 10 | ||
192 | |||
193 | combads(t1,t0,[x+40]) | ||
194 | mov [z+80], t0 | ||
195 | |||
196 | // Result term 11 | ||
197 | |||
198 | mov [z+88], t1 | ||
199 | |||
200 | // Return | ||
201 | |||
202 | #if WINDOWS_ABI | ||
203 | pop rsi | ||
204 | pop rdi | ||
205 | #endif | ||
206 | ret | ||
207 | |||
208 | #if defined(__linux__) && defined(__ELF__) | ||
209 | .section .note.GNU-stack,"",%progbits | ||
210 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16.S new file mode 100644 index 0000000000..41277b5b6a --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16.S | |||
@@ -0,0 +1,311 @@ | |||
1 | // $OpenBSD: bignum_sqr_8_16.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
2 | // | ||
3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
4 | // | ||
5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
6 | // purpose with or without fee is hereby granted, provided that the above | ||
7 | // copyright notice and this permission notice appear in all copies. | ||
8 | // | ||
9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
16 | |||
17 | // ---------------------------------------------------------------------------- | ||
18 | // Square, z := x^2 | ||
19 | // Input x[8]; output z[16] | ||
20 | // | ||
21 | // extern void bignum_sqr_8_16(uint64_t z[static 16], const uint64_t x[static 8]); | ||
22 | // | ||
23 | // Standard x86-64 ABI: RDI = z, RSI = x | ||
24 | // Microsoft x64 ABI: RCX = z, RDX = x | ||
25 | // ---------------------------------------------------------------------------- | ||
26 | |||
27 | #include "s2n_bignum_internal.h" | ||
28 | |||
29 | .intel_syntax noprefix | ||
30 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_8_16) | ||
31 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_8_16) | ||
32 | .text | ||
33 | |||
34 | // These are actually right | ||
35 | |||
36 | #define z rdi | ||
37 | #define x rsi | ||
38 | |||
39 | // A zero register | ||
40 | |||
41 | #define zero rbp | ||
42 | #define zeroe ebp | ||
43 | |||
44 | // mulpadd i, j adds rdx * x[i] into the window at the i+j point | ||
45 | |||
46 | .macro mulpadd arg1,arg2 | ||
47 | mulx rcx, rax, [x+8*\arg1] | ||
48 | .if ((\arg1 + \arg2) % 8 == 0) | ||
49 | adcx r8, rax | ||
50 | adox r9, rcx | ||
51 | .elseif ((\arg1 + \arg2) % 8 == 1) | ||
52 | adcx r9, rax | ||
53 | adox r10, rcx | ||
54 | .elseif ((\arg1 + \arg2) % 8 == 2) | ||
55 | adcx r10, rax | ||
56 | adox r11, rcx | ||
57 | .elseif ((\arg1 + \arg2) % 8 == 3) | ||
58 | adcx r11, rax | ||
59 | adox r12, rcx | ||
60 | .elseif ((\arg1 + \arg2) % 8 == 4) | ||
61 | adcx r12, rax | ||
62 | adox r13, rcx | ||
63 | .elseif ((\arg1 + \arg2) % 8 == 5) | ||
64 | adcx r13, rax | ||
65 | adox r14, rcx | ||
66 | .elseif ((\arg1 + \arg2) % 8 == 6) | ||
67 | adcx r14, rax | ||
68 | adox r15, rcx | ||
69 | .elseif ((\arg1 + \arg2) % 8 == 7) | ||
70 | adcx r15, rax | ||
71 | adox r8, rcx | ||
72 | .endif | ||
73 | |||
74 | .endm | ||
75 | |||
76 | // mulpade i, j adds rdx * x[i] into the window at i+j | ||
77 | // but re-creates the top word assuming nothing to add there | ||
78 | |||
79 | .macro mulpade arg1,arg2 | ||
80 | .if ((\arg1 + \arg2) % 8 == 0) | ||
81 | mulx r9, rax, [x+8*\arg1] | ||
82 | adcx r8, rax | ||
83 | adox r9, zero | ||
84 | .elseif ((\arg1 + \arg2) % 8 == 1) | ||
85 | mulx r10, rax, [x+8*\arg1] | ||
86 | adcx r9, rax | ||
87 | adox r10, zero | ||
88 | .elseif ((\arg1 + \arg2) % 8 == 2) | ||
89 | mulx r11, rax, [x+8*\arg1] | ||
90 | adcx r10, rax | ||
91 | adox r11, zero | ||
92 | .elseif ((\arg1 + \arg2) % 8 == 3) | ||
93 | mulx r12, rax, [x+8*\arg1] | ||
94 | adcx r11, rax | ||
95 | adox r12, zero | ||
96 | .elseif ((\arg1 + \arg2) % 8 == 4) | ||
97 | mulx r13, rax, [x+8*\arg1] | ||
98 | adcx r12, rax | ||
99 | adox r13, zero | ||
100 | .elseif ((\arg1 + \arg2) % 8 == 5) | ||
101 | mulx r14, rax, [x+8*\arg1] | ||
102 | adcx r13, rax | ||
103 | adox r14, zero | ||
104 | .elseif ((\arg1 + \arg2) % 8 == 6) | ||
105 | mulx r15, rax, [x+8*\arg1] | ||
106 | adcx r14, rax | ||
107 | adox r15, zero | ||
108 | .elseif ((\arg1 + \arg2) % 8 == 7) | ||
109 | mulx r8, rax, [x+8*\arg1] | ||
110 | adcx r15, rax | ||
111 | adox r8, zero | ||
112 | .endif | ||
113 | |||
114 | .endm | ||
115 | |||
116 | .macro diagonals | ||
117 | |||
118 | xor zeroe, zeroe | ||
119 | |||
120 | // Set initial window [r8..r10] + 2 wb = 10 + 20 + 30 + 40 + 50 + 60 + 70 | ||
121 | |||
122 | mov rdx, [x] | ||
123 | mulx rax, r9, [x+8] | ||
124 | mov [z+8], r9 | ||
125 | mulx rcx, r10, [x+16] | ||
126 | adcx r10, rax | ||
127 | mov [z+16], r10 | ||
128 | mulx rax, r11, [x+24] | ||
129 | adcx r11, rcx | ||
130 | mulx rcx, r12, [x+32] | ||
131 | adcx r12, rax | ||
132 | mulx rax, r13, [x+40] | ||
133 | adcx r13, rcx | ||
134 | mulx rcx, r14, [x+48] | ||
135 | adcx r14, rax | ||
136 | mulx r8, r15, [x+56] | ||
137 | adcx r15, rcx | ||
138 | adcx r8, zero | ||
139 | |||
140 | // Add in the next diagonal = 21 + 31 + 41 + 51 + 61 + 71 + 54 | ||
141 | |||
142 | xor zeroe, zeroe | ||
143 | mov rdx, [x+8] | ||
144 | mulpadd 2, 1 | ||
145 | mov [z+24], r11 | ||
146 | mulpadd 3, 1 | ||
147 | mov [z+32], r12 | ||
148 | mulpadd 4, 1 | ||
149 | mulpadd 5, 1 | ||
150 | mulpadd 6, 1 | ||
151 | mulpade 7, 1 | ||
152 | mov rdx, [x+32] | ||
153 | mulpade 5, 4 | ||
154 | adcx r10, zero | ||
155 | |||
156 | // And the next one = 32 + 42 + 52 + 62 + 72 + 64 + 65 | ||
157 | |||
158 | xor zeroe, zeroe | ||
159 | mov rdx, [x+16] | ||
160 | mulpadd 3, 2 | ||
161 | mov [z+40], r13 | ||
162 | mulpadd 4, 2 | ||
163 | mov [z+48], r14 | ||
164 | mulpadd 5, 2 | ||
165 | mulpadd 6, 2 | ||
166 | mulpadd 7, 2 | ||
167 | mov rdx, [x+48] | ||
168 | mulpade 4, 6 | ||
169 | mulpade 5, 6 | ||
170 | adcx r12, zero | ||
171 | |||
172 | // And the final one = 43 + 53 + 63 + 73 + 74 + 75 + 76 | ||
173 | |||
174 | xor zeroe, zeroe | ||
175 | mov rdx, [x+24] | ||
176 | mulpadd 4, 3 | ||
177 | mov [z+56], r15 | ||
178 | mulpadd 5, 3 | ||
179 | mov [z+64], r8 | ||
180 | mulpadd 6, 3 | ||
181 | mulpadd 7, 3 | ||
182 | mov rdx, [x+56] | ||
183 | mulpadd 4, 7 | ||
184 | mulpade 5, 7 | ||
185 | mulpade 6, 7 | ||
186 | adcx r14, zero | ||
187 | |||
188 | // Double and add things; use z[1]..z[8] and thereafter the registers | ||
189 | // r9..r15 which haven't been written back yet | ||
190 | |||
191 | xor zeroe, zeroe | ||
192 | mov rdx, [x] | ||
193 | mulx rcx, rax, rdx | ||
194 | mov [z], rax | ||
195 | mov rax, [z+8] | ||
196 | adcx rax, rax | ||
197 | adox rax, rcx | ||
198 | mov [z+8], rax | ||
199 | |||
200 | mov rax, [z+16] | ||
201 | mov rdx, [x+8] | ||
202 | mulx rcx, rdx, rdx | ||
203 | adcx rax, rax | ||
204 | adox rax, rdx | ||
205 | mov [z+16], rax | ||
206 | mov rax, [z+24] | ||
207 | adcx rax, rax | ||
208 | adox rax, rcx | ||
209 | mov [z+24], rax | ||
210 | |||
211 | mov rax, [z+32] | ||
212 | mov rdx, [x+16] | ||
213 | mulx rcx, rdx, rdx | ||
214 | adcx rax, rax | ||
215 | adox rax, rdx | ||
216 | mov [z+32], rax | ||
217 | mov rax, [z+40] | ||
218 | adcx rax, rax | ||
219 | adox rax, rcx | ||
220 | mov [z+40], rax | ||
221 | |||
222 | mov rax, [z+48] | ||
223 | mov rdx, [x+24] | ||
224 | mulx rcx, rdx, rdx | ||
225 | adcx rax, rax | ||
226 | adox rax, rdx | ||
227 | mov [z+48], rax | ||
228 | mov rax, [z+56] | ||
229 | adcx rax, rax | ||
230 | adox rax, rcx | ||
231 | mov [z+56], rax | ||
232 | |||
233 | mov rax, [z+64] | ||
234 | mov rdx, [x+32] | ||
235 | mulx rcx, rdx, rdx | ||
236 | adcx rax, rax | ||
237 | adox rax, rdx | ||
238 | mov [z+64], rax | ||
239 | adcx r9, r9 | ||
240 | adox r9, rcx | ||
241 | mov [z+72], r9 | ||
242 | |||
243 | mov rdx, [x+40] | ||
244 | mulx rcx, rdx, rdx | ||
245 | adcx r10, r10 | ||
246 | adox r10, rdx | ||
247 | mov [z+80], r10 | ||
248 | adcx r11, r11 | ||
249 | adox r11, rcx | ||
250 | mov [z+88], r11 | ||
251 | |||
252 | mov rdx, [x+48] | ||
253 | mulx rcx, rdx, rdx | ||
254 | adcx r12, r12 | ||
255 | adox r12, rdx | ||
256 | mov [z+96], r12 | ||
257 | adcx r13, r13 | ||
258 | adox r13, rcx | ||
259 | mov [z+104], r13 | ||
260 | |||
261 | mov rdx, [x+56] | ||
262 | mulx r15, rdx, rdx | ||
263 | adcx r14, r14 | ||
264 | adox r14, rdx | ||
265 | mov [z+112], r14 | ||
266 | adcx r15, zero | ||
267 | adox r15, zero | ||
268 | mov [z+120], r15 | ||
269 | |||
270 | .endm | ||
271 | |||
272 | |||
273 | S2N_BN_SYMBOL(bignum_sqr_8_16): | ||
274 | _CET_ENDBR | ||
275 | |||
276 | #if WINDOWS_ABI | ||
277 | push rdi | ||
278 | push rsi | ||
279 | mov rdi, rcx | ||
280 | mov rsi, rdx | ||
281 | #endif | ||
282 | |||
283 | // Save more registers to play with | ||
284 | |||
285 | push rbp | ||
286 | push r12 | ||
287 | push r13 | ||
288 | push r14 | ||
289 | push r15 | ||
290 | |||
291 | // Do the multiplication | ||
292 | |||
293 | diagonals | ||
294 | |||
295 | // Real epilog | ||
296 | |||
297 | pop r15 | ||
298 | pop r14 | ||
299 | pop r13 | ||
300 | pop r12 | ||
301 | pop rbp | ||
302 | |||
303 | #if WINDOWS_ABI | ||
304 | pop rsi | ||
305 | pop rdi | ||
306 | #endif | ||
307 | ret | ||
308 | |||
309 | #if defined(__linux__) && defined(__ELF__) | ||
310 | .section .note.GNU-stack,"",%progbits | ||
311 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S index ac0b6f96c2..cb10ba2a12 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S | |||
@@ -1,3 +1,5 @@ | |||
1 | // $OpenBSD: bignum_sqr_8_16_alt.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
2 | // | ||
1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
2 | // | 4 | // |
3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
@@ -16,7 +18,8 @@ | |||
16 | // Square, z := x^2 | 18 | // Square, z := x^2 |
17 | // Input x[8]; output z[16] | 19 | // Input x[8]; output z[16] |
18 | // | 20 | // |
19 | // extern void bignum_sqr_8_16_alt (uint64_t z[static 16], uint64_t x[static 8]); | 21 | // extern void bignum_sqr_8_16_alt(uint64_t z[static 16], |
22 | // const uint64_t x[static 8]); | ||
20 | // | 23 | // |
21 | // Standard x86-64 ABI: RDI = z, RSI = x | 24 | // Standard x86-64 ABI: RDI = z, RSI = x |
22 | // Microsoft x64 ABI: RCX = z, RDX = x | 25 | // Microsoft x64 ABI: RCX = z, RDX = x |
@@ -103,7 +106,7 @@ | |||
103 | adc c, 0 | 106 | adc c, 0 |
104 | 107 | ||
105 | S2N_BN_SYMBOL(bignum_sqr_8_16_alt): | 108 | S2N_BN_SYMBOL(bignum_sqr_8_16_alt): |
106 | _CET_ENDBR | 109 | _CET_ENDBR |
107 | 110 | ||
108 | #if WINDOWS_ABI | 111 | #if WINDOWS_ABI |
109 | push rdi | 112 | push rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S index 3ff8a30510..7324d3a71e 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S | |||
@@ -1,3 +1,5 @@ | |||
1 | // $OpenBSD: bignum_sub.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
2 | // | ||
1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
2 | // | 4 | // |
3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
@@ -16,9 +18,8 @@ | |||
16 | // Subtract, z := x - y | 18 | // Subtract, z := x - y |
17 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] | 19 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] |
18 | // | 20 | // |
19 | // extern uint64_t bignum_sub | 21 | // extern uint64_t bignum_sub(uint64_t p, uint64_t *z, uint64_t m, |
20 | // (uint64_t p, uint64_t *z, | 22 | // const uint64_t *x, uint64_t n, const uint64_t *y); |
21 | // uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | ||
22 | // | 23 | // |
23 | // Does the z := x - y operation, truncating modulo p words in general and | 24 | // Does the z := x - y operation, truncating modulo p words in general and |
24 | // returning a top borrow (0 or 1) in the p'th place, only subtracting input | 25 | // returning a top borrow (0 or 1) in the p'th place, only subtracting input |
@@ -49,7 +50,7 @@ | |||
49 | 50 | ||
50 | 51 | ||
51 | S2N_BN_SYMBOL(bignum_sub): | 52 | S2N_BN_SYMBOL(bignum_sub): |
52 | _CET_ENDBR | 53 | _CET_ENDBR |
53 | 54 | ||
54 | #if WINDOWS_ABI | 55 | #if WINDOWS_ABI |
55 | push rdi | 56 | push rdi |
@@ -75,7 +76,7 @@ S2N_BN_SYMBOL(bignum_sub): | |||
75 | cmp p, n | 76 | cmp p, n |
76 | cmovc n, p | 77 | cmovc n, p |
77 | cmp m, n | 78 | cmp m, n |
78 | jc ylonger | 79 | jc bignum_sub_ylonger |
79 | 80 | ||
80 | // The case where x is longer or of the same size (p >= m >= n) | 81 | // The case where x is longer or of the same size (p >= m >= n) |
81 | 82 | ||
@@ -83,32 +84,32 @@ S2N_BN_SYMBOL(bignum_sub): | |||
83 | sub m, n | 84 | sub m, n |
84 | inc m | 85 | inc m |
85 | test n, n | 86 | test n, n |
86 | jz xtest | 87 | jz bignum_sub_xtest |
87 | xmainloop: | 88 | bignum_sub_xmainloop: |
88 | mov a, [x+8*i] | 89 | mov a, [x+8*i] |
89 | sbb a, [y+8*i] | 90 | sbb a, [y+8*i] |
90 | mov [z+8*i],a | 91 | mov [z+8*i],a |
91 | inc i | 92 | inc i |
92 | dec n | 93 | dec n |
93 | jnz xmainloop | 94 | jnz bignum_sub_xmainloop |
94 | jmp xtest | 95 | jmp bignum_sub_xtest |
95 | xtoploop: | 96 | bignum_sub_xtoploop: |
96 | mov a, [x+8*i] | 97 | mov a, [x+8*i] |
97 | sbb a, 0 | 98 | sbb a, 0 |
98 | mov [z+8*i],a | 99 | mov [z+8*i],a |
99 | inc i | 100 | inc i |
100 | xtest: | 101 | bignum_sub_xtest: |
101 | dec m | 102 | dec m |
102 | jnz xtoploop | 103 | jnz bignum_sub_xtoploop |
103 | sbb a, a | 104 | sbb a, a |
104 | test p, p | 105 | test p, p |
105 | jz tailskip | 106 | jz bignum_sub_tailskip |
106 | tailloop: | 107 | bignum_sub_tailloop: |
107 | mov [z+8*i],a | 108 | mov [z+8*i],a |
108 | inc i | 109 | inc i |
109 | dec p | 110 | dec p |
110 | jnz tailloop | 111 | jnz bignum_sub_tailloop |
111 | tailskip: | 112 | bignum_sub_tailskip: |
112 | neg a | 113 | neg a |
113 | #if WINDOWS_ABI | 114 | #if WINDOWS_ABI |
114 | pop rsi | 115 | pop rsi |
@@ -118,29 +119,29 @@ tailskip: | |||
118 | 119 | ||
119 | // The case where y is longer (p >= n > m) | 120 | // The case where y is longer (p >= n > m) |
120 | 121 | ||
121 | ylonger: | 122 | bignum_sub_ylonger: |
122 | 123 | ||
123 | sub p, n | 124 | sub p, n |
124 | sub n, m | 125 | sub n, m |
125 | test m, m | 126 | test m, m |
126 | jz ytoploop | 127 | jz bignum_sub_ytoploop |
127 | ymainloop: | 128 | bignum_sub_ymainloop: |
128 | mov a, [x+8*i] | 129 | mov a, [x+8*i] |
129 | sbb a, [y+8*i] | 130 | sbb a, [y+8*i] |
130 | mov [z+8*i],a | 131 | mov [z+8*i],a |
131 | inc i | 132 | inc i |
132 | dec m | 133 | dec m |
133 | jnz ymainloop | 134 | jnz bignum_sub_ymainloop |
134 | ytoploop: | 135 | bignum_sub_ytoploop: |
135 | mov ashort, 0 | 136 | mov ashort, 0 |
136 | sbb a, [y+8*i] | 137 | sbb a, [y+8*i] |
137 | mov [z+8*i],a | 138 | mov [z+8*i],a |
138 | inc i | 139 | inc i |
139 | dec n | 140 | dec n |
140 | jnz ytoploop | 141 | jnz bignum_sub_ytoploop |
141 | sbb a, a | 142 | sbb a, a |
142 | test p, p | 143 | test p, p |
143 | jnz tailloop | 144 | jnz bignum_sub_tailloop |
144 | neg a | 145 | neg a |
145 | #if WINDOWS_ABI | 146 | #if WINDOWS_ABI |
146 | pop rsi | 147 | pop rsi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bn_arch.c b/src/lib/libcrypto/bn/arch/amd64/bn_arch.c index a377a05681..9ff8920ca2 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bn_arch.c +++ b/src/lib/libcrypto/bn/arch/amd64/bn_arch.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_arch.c,v 1.7 2023/06/24 16:01:44 jsing Exp $ */ | 1 | /* $OpenBSD: bn_arch.c,v 1.12 2025/08/14 15:29:17 jsing Exp $ */ |
2 | /* | 2 | /* |
3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
4 | * | 4 | * |
@@ -19,6 +19,7 @@ | |||
19 | 19 | ||
20 | #include "bn_arch.h" | 20 | #include "bn_arch.h" |
21 | #include "bn_local.h" | 21 | #include "bn_local.h" |
22 | #include "crypto_arch.h" | ||
22 | #include "s2n_bignum.h" | 23 | #include "s2n_bignum.h" |
23 | 24 | ||
24 | #ifdef HAVE_BN_ADD | 25 | #ifdef HAVE_BN_ADD |
@@ -26,8 +27,8 @@ BN_ULONG | |||
26 | bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b, | 27 | bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b, |
27 | int b_len) | 28 | int b_len) |
28 | { | 29 | { |
29 | return bignum_add(r_len, (uint64_t *)r, a_len, (uint64_t *)a, | 30 | return bignum_add(r_len, (uint64_t *)r, a_len, (const uint64_t *)a, |
30 | b_len, (uint64_t *)b); | 31 | b_len, (const uint64_t *)b); |
31 | } | 32 | } |
32 | #endif | 33 | #endif |
33 | 34 | ||
@@ -36,8 +37,8 @@ bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b, | |||
36 | BN_ULONG | 37 | BN_ULONG |
37 | bn_add_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n) | 38 | bn_add_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n) |
38 | { | 39 | { |
39 | return bignum_add(n, (uint64_t *)rd, n, (uint64_t *)ad, n, | 40 | return bignum_add(n, (uint64_t *)rd, n, (const uint64_t *)ad, n, |
40 | (uint64_t *)bd); | 41 | (const uint64_t *)bd); |
41 | } | 42 | } |
42 | #endif | 43 | #endif |
43 | 44 | ||
@@ -46,8 +47,8 @@ BN_ULONG | |||
46 | bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b, | 47 | bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b, |
47 | int b_len) | 48 | int b_len) |
48 | { | 49 | { |
49 | return bignum_sub(r_len, (uint64_t *)r, a_len, (uint64_t *)a, | 50 | return bignum_sub(r_len, (uint64_t *)r, a_len, (const uint64_t *)a, |
50 | b_len, (uint64_t *)b); | 51 | b_len, (const uint64_t *)b); |
51 | } | 52 | } |
52 | #endif | 53 | #endif |
53 | 54 | ||
@@ -55,8 +56,28 @@ bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b, | |||
55 | BN_ULONG | 56 | BN_ULONG |
56 | bn_sub_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n) | 57 | bn_sub_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n) |
57 | { | 58 | { |
58 | return bignum_sub(n, (uint64_t *)rd, n, (uint64_t *)ad, n, | 59 | return bignum_sub(n, (uint64_t *)rd, n, (const uint64_t *)ad, n, |
59 | (uint64_t *)bd); | 60 | (const uint64_t *)bd); |
61 | } | ||
62 | #endif | ||
63 | |||
64 | #ifdef HAVE_BN_MOD_ADD_WORDS | ||
65 | void | ||
66 | bn_mod_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | ||
67 | const BN_ULONG *m, size_t n) | ||
68 | { | ||
69 | bignum_modadd(n, (uint64_t *)r, (const uint64_t *)a, | ||
70 | (const uint64_t *)b, (const uint64_t *)m); | ||
71 | } | ||
72 | #endif | ||
73 | |||
74 | #ifdef HAVE_BN_MOD_SUB_WORDS | ||
75 | void | ||
76 | bn_mod_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | ||
77 | const BN_ULONG *m, size_t n) | ||
78 | { | ||
79 | bignum_modsub(n, (uint64_t *)r, (const uint64_t *)a, | ||
80 | (const uint64_t *)b, (const uint64_t *)m); | ||
60 | } | 81 | } |
61 | #endif | 82 | #endif |
62 | 83 | ||
@@ -64,7 +85,7 @@ bn_sub_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n) | |||
64 | BN_ULONG | 85 | BN_ULONG |
65 | bn_mul_add_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w) | 86 | bn_mul_add_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w) |
66 | { | 87 | { |
67 | return bignum_cmadd(num, (uint64_t *)rd, w, num, (uint64_t *)ad); | 88 | return bignum_cmadd(num, (uint64_t *)rd, w, num, (const uint64_t *)ad); |
68 | } | 89 | } |
69 | #endif | 90 | #endif |
70 | 91 | ||
@@ -72,25 +93,52 @@ bn_mul_add_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w) | |||
72 | BN_ULONG | 93 | BN_ULONG |
73 | bn_mul_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w) | 94 | bn_mul_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w) |
74 | { | 95 | { |
75 | return bignum_cmul(num, (uint64_t *)rd, w, num, (uint64_t *)ad); | 96 | return bignum_cmul(num, (uint64_t *)rd, w, num, (const uint64_t *)ad); |
76 | } | 97 | } |
77 | #endif | 98 | #endif |
78 | 99 | ||
79 | #ifdef HAVE_BN_MUL_COMBA4 | 100 | #ifdef HAVE_BN_MUL_COMBA4 |
80 | void | 101 | void |
81 | bn_mul_comba4(BN_ULONG *rd, BN_ULONG *ad, BN_ULONG *bd) | 102 | bn_mul_comba4(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd) |
82 | { | 103 | { |
83 | /* XXX - consider using non-alt on CPUs that have the ADX extension. */ | 104 | if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_ADX) != 0) { |
84 | bignum_mul_4_8_alt((uint64_t *)rd, (uint64_t *)ad, (uint64_t *)bd); | 105 | bignum_mul_4_8((uint64_t *)rd, (const uint64_t *)ad, |
106 | (const uint64_t *)bd); | ||
107 | return; | ||
108 | } | ||
109 | |||
110 | bignum_mul_4_8_alt((uint64_t *)rd, (const uint64_t *)ad, | ||
111 | (const uint64_t *)bd); | ||
112 | } | ||
113 | #endif | ||
114 | |||
115 | #ifdef HAVE_BN_MUL_COMBA6 | ||
116 | void | ||
117 | bn_mul_comba6(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd) | ||
118 | { | ||
119 | if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_ADX) != 0) { | ||
120 | bignum_mul_6_12((uint64_t *)rd, (const uint64_t *)ad, | ||
121 | (const uint64_t *)bd); | ||
122 | return; | ||
123 | } | ||
124 | |||
125 | bignum_mul_6_12_alt((uint64_t *)rd, (const uint64_t *)ad, | ||
126 | (const uint64_t *)bd); | ||
85 | } | 127 | } |
86 | #endif | 128 | #endif |
87 | 129 | ||
88 | #ifdef HAVE_BN_MUL_COMBA8 | 130 | #ifdef HAVE_BN_MUL_COMBA8 |
89 | void | 131 | void |
90 | bn_mul_comba8(BN_ULONG *rd, BN_ULONG *ad, BN_ULONG *bd) | 132 | bn_mul_comba8(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd) |
91 | { | 133 | { |
92 | /* XXX - consider using non-alt on CPUs that have the ADX extension. */ | 134 | if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_ADX) != 0) { |
93 | bignum_mul_8_16_alt((uint64_t *)rd, (uint64_t *)ad, (uint64_t *)bd); | 135 | bignum_mul_8_16((uint64_t *)rd, (const uint64_t *)ad, |
136 | (const uint64_t *)bd); | ||
137 | return; | ||
138 | } | ||
139 | |||
140 | bignum_mul_8_16_alt((uint64_t *)rd, (const uint64_t *)ad, | ||
141 | (const uint64_t *)bd); | ||
94 | } | 142 | } |
95 | #endif | 143 | #endif |
96 | 144 | ||
@@ -98,7 +146,7 @@ bn_mul_comba8(BN_ULONG *rd, BN_ULONG *ad, BN_ULONG *bd) | |||
98 | int | 146 | int |
99 | bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx) | 147 | bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx) |
100 | { | 148 | { |
101 | bignum_sqr(r_len, (uint64_t *)r->d, a->top, (uint64_t *)a->d); | 149 | bignum_sqr(r_len, (uint64_t *)r->d, a->top, (const uint64_t *)a->d); |
102 | 150 | ||
103 | return 1; | 151 | return 1; |
104 | } | 152 | } |
@@ -108,8 +156,25 @@ bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx) | |||
108 | void | 156 | void |
109 | bn_sqr_comba4(BN_ULONG *rd, const BN_ULONG *ad) | 157 | bn_sqr_comba4(BN_ULONG *rd, const BN_ULONG *ad) |
110 | { | 158 | { |
111 | /* XXX - consider using non-alt on CPUs that have the ADX extension. */ | 159 | if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_ADX) != 0) { |
112 | bignum_sqr_4_8_alt((uint64_t *)rd, (uint64_t *)ad); | 160 | bignum_sqr_4_8((uint64_t *)rd, (const uint64_t *)ad); |
161 | return; | ||
162 | } | ||
163 | |||
164 | bignum_sqr_4_8_alt((uint64_t *)rd, (const uint64_t *)ad); | ||
165 | } | ||
166 | #endif | ||
167 | |||
168 | #ifdef HAVE_BN_SQR_COMBA6 | ||
169 | void | ||
170 | bn_sqr_comba6(BN_ULONG *rd, const BN_ULONG *ad) | ||
171 | { | ||
172 | if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_ADX) != 0) { | ||
173 | bignum_sqr_6_12((uint64_t *)rd, (const uint64_t *)ad); | ||
174 | return; | ||
175 | } | ||
176 | |||
177 | bignum_sqr_6_12_alt((uint64_t *)rd, (const uint64_t *)ad); | ||
113 | } | 178 | } |
114 | #endif | 179 | #endif |
115 | 180 | ||
@@ -117,8 +182,12 @@ bn_sqr_comba4(BN_ULONG *rd, const BN_ULONG *ad) | |||
117 | void | 182 | void |
118 | bn_sqr_comba8(BN_ULONG *rd, const BN_ULONG *ad) | 183 | bn_sqr_comba8(BN_ULONG *rd, const BN_ULONG *ad) |
119 | { | 184 | { |
120 | /* XXX - consider using non-alt on CPUs that have the ADX extension. */ | 185 | if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_ADX) != 0) { |
121 | bignum_sqr_8_16_alt((uint64_t *)rd, (uint64_t *)ad); | 186 | bignum_sqr_8_16((uint64_t *)rd, (const uint64_t *)ad); |
187 | return; | ||
188 | } | ||
189 | |||
190 | bignum_sqr_8_16_alt((uint64_t *)rd, (const uint64_t *)ad); | ||
122 | } | 191 | } |
123 | #endif | 192 | #endif |
124 | 193 | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h b/src/lib/libcrypto/bn/arch/amd64/bn_arch.h index 927cd75208..7359f993a7 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h +++ b/src/lib/libcrypto/bn/arch/amd64/bn_arch.h | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_arch.h,v 1.14 2024/03/26 06:09:25 jsing Exp $ */ | 1 | /* $OpenBSD: bn_arch.h,v 1.16 2025/08/14 15:22:54 jsing Exp $ */ |
2 | /* | 2 | /* |
3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
4 | * | 4 | * |
@@ -27,13 +27,18 @@ | |||
27 | 27 | ||
28 | #define HAVE_BN_DIV_WORDS | 28 | #define HAVE_BN_DIV_WORDS |
29 | 29 | ||
30 | #define HAVE_BN_MOD_ADD_WORDS | ||
31 | #define HAVE_BN_MOD_SUB_WORDS | ||
32 | |||
30 | #define HAVE_BN_MUL_ADD_WORDS | 33 | #define HAVE_BN_MUL_ADD_WORDS |
31 | #define HAVE_BN_MUL_COMBA4 | 34 | #define HAVE_BN_MUL_COMBA4 |
35 | #define HAVE_BN_MUL_COMBA6 | ||
32 | #define HAVE_BN_MUL_COMBA8 | 36 | #define HAVE_BN_MUL_COMBA8 |
33 | #define HAVE_BN_MUL_WORDS | 37 | #define HAVE_BN_MUL_WORDS |
34 | 38 | ||
35 | #define HAVE_BN_SQR | 39 | #define HAVE_BN_SQR |
36 | #define HAVE_BN_SQR_COMBA4 | 40 | #define HAVE_BN_SQR_COMBA4 |
41 | #define HAVE_BN_SQR_COMBA6 | ||
37 | #define HAVE_BN_SQR_COMBA8 | 42 | #define HAVE_BN_SQR_COMBA8 |
38 | 43 | ||
39 | #define HAVE_BN_SUB | 44 | #define HAVE_BN_SUB |
diff --git a/src/lib/libcrypto/bn/arch/amd64/word_clz.S b/src/lib/libcrypto/bn/arch/amd64/word_clz.S index 3926fcd4b0..705fbdbbda 100644 --- a/src/lib/libcrypto/bn/arch/amd64/word_clz.S +++ b/src/lib/libcrypto/bn/arch/amd64/word_clz.S | |||
@@ -1,3 +1,5 @@ | |||
1 | // $OpenBSD: word_clz.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
2 | // | ||
1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
2 | // | 4 | // |
3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
@@ -16,7 +18,7 @@ | |||
16 | // Count leading zero bits in a single word | 18 | // Count leading zero bits in a single word |
17 | // Input a; output function return | 19 | // Input a; output function return |
18 | // | 20 | // |
19 | // extern uint64_t word_clz (uint64_t a); | 21 | // extern uint64_t word_clz(uint64_t a); |
20 | // | 22 | // |
21 | // Standard x86-64 ABI: RDI = a, returns RAX | 23 | // Standard x86-64 ABI: RDI = a, returns RAX |
22 | // Microsoft x64 ABI: RCX = a, returns RAX | 24 | // Microsoft x64 ABI: RCX = a, returns RAX |
@@ -30,7 +32,7 @@ | |||
30 | .text | 32 | .text |
31 | 33 | ||
32 | S2N_BN_SYMBOL(word_clz): | 34 | S2N_BN_SYMBOL(word_clz): |
33 | _CET_ENDBR | 35 | _CET_ENDBR |
34 | 36 | ||
35 | #if WINDOWS_ABI | 37 | #if WINDOWS_ABI |
36 | push rdi | 38 | push rdi |
diff --git a/src/lib/libcrypto/bn/bn_internal.h b/src/lib/libcrypto/bn/bn_internal.h index a1f1515b57..8b5145e225 100644 --- a/src/lib/libcrypto/bn/bn_internal.h +++ b/src/lib/libcrypto/bn/bn_internal.h | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_internal.h,v 1.19 2025/05/25 05:12:05 jsing Exp $ */ | 1 | /* $OpenBSD: bn_internal.h,v 1.20 2025/08/02 16:20:00 jsing Exp $ */ |
2 | /* | 2 | /* |
3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
4 | * | 4 | * |
@@ -41,6 +41,8 @@ void bn_mod_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | |||
41 | const BN_ULONG *m, size_t n); | 41 | const BN_ULONG *m, size_t n); |
42 | void bn_mod_mul_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | 42 | void bn_mod_mul_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, |
43 | const BN_ULONG *m, BN_ULONG *t, BN_ULONG m0, size_t n); | 43 | const BN_ULONG *m, BN_ULONG *t, BN_ULONG m0, size_t n); |
44 | void bn_mod_sqr_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *m, | ||
45 | BN_ULONG *t, BN_ULONG m0, size_t n); | ||
44 | 46 | ||
45 | void bn_montgomery_multiply_words(BN_ULONG *rp, const BN_ULONG *ap, | 47 | void bn_montgomery_multiply_words(BN_ULONG *rp, const BN_ULONG *ap, |
46 | const BN_ULONG *bp, const BN_ULONG *np, BN_ULONG *tp, BN_ULONG n0, | 48 | const BN_ULONG *bp, const BN_ULONG *np, BN_ULONG *tp, BN_ULONG n0, |
diff --git a/src/lib/libcrypto/bn/bn_local.h b/src/lib/libcrypto/bn/bn_local.h index 0eb9a7a745..1bd4c16baf 100644 --- a/src/lib/libcrypto/bn/bn_local.h +++ b/src/lib/libcrypto/bn/bn_local.h | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_local.h,v 1.51 2025/05/25 04:30:55 jsing Exp $ */ | 1 | /* $OpenBSD: bn_local.h,v 1.54 2025/08/05 15:08:13 jsing Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -240,10 +240,12 @@ BN_ULONG bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, | |||
240 | const BN_ULONG *b, int b_len); | 240 | const BN_ULONG *b, int b_len); |
241 | 241 | ||
242 | void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb); | 242 | void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb); |
243 | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); | 243 | void bn_mul_comba4(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b); |
244 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); | 244 | void bn_mul_comba6(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b); |
245 | void bn_mul_comba8(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b); | ||
245 | 246 | ||
246 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a); | 247 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a); |
248 | void bn_sqr_comba6(BN_ULONG *r, const BN_ULONG *a); | ||
247 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a); | 249 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a); |
248 | 250 | ||
249 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | 251 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
diff --git a/src/lib/libcrypto/bn/bn_mod_words.c b/src/lib/libcrypto/bn/bn_mod_words.c index 8971f9f306..d9aee8701a 100644 --- a/src/lib/libcrypto/bn/bn_mod_words.c +++ b/src/lib/libcrypto/bn/bn_mod_words.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_mod_words.c,v 1.1 2025/05/25 04:58:32 jsing Exp $ */ | 1 | /* $OpenBSD: bn_mod_words.c,v 1.3 2025/08/05 15:15:54 jsing Exp $ */ |
2 | /* | 2 | /* |
3 | * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> |
4 | * | 4 | * |
@@ -73,6 +73,42 @@ void | |||
73 | bn_mod_mul_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | 73 | bn_mod_mul_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, |
74 | const BN_ULONG *m, BN_ULONG *t, BN_ULONG m0, size_t n) | 74 | const BN_ULONG *m, BN_ULONG *t, BN_ULONG m0, size_t n) |
75 | { | 75 | { |
76 | bn_montgomery_multiply_words(r, a, b, m, t, m0, n); | 76 | if (n == 4) { |
77 | bn_mul_comba4(t, a, b); | ||
78 | bn_montgomery_reduce_words(r, t, m, m0, n); | ||
79 | } else if (n == 6) { | ||
80 | bn_mul_comba6(t, a, b); | ||
81 | bn_montgomery_reduce_words(r, t, m, m0, n); | ||
82 | } else if (n == 8) { | ||
83 | bn_mul_comba8(t, a, b); | ||
84 | bn_montgomery_reduce_words(r, t, m, m0, n); | ||
85 | } else { | ||
86 | bn_montgomery_multiply_words(r, a, b, m, t, m0, n); | ||
87 | } | ||
88 | } | ||
89 | #endif | ||
90 | |||
91 | /* | ||
92 | * bn_mod_sqr_words() computes r[] = (a[] * a[]) mod m[], where a, r and | ||
93 | * m are arrays of words with length n (r may be the same as a) in the | ||
94 | * Montgomery domain. The result remains in the Montgomery domain. | ||
95 | */ | ||
96 | #ifndef HAVE_BN_MOD_SQR_WORDS | ||
97 | void | ||
98 | bn_mod_sqr_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *m, | ||
99 | BN_ULONG *t, BN_ULONG m0, size_t n) | ||
100 | { | ||
101 | if (n == 4) { | ||
102 | bn_sqr_comba4(t, a); | ||
103 | bn_montgomery_reduce_words(r, t, m, m0, n); | ||
104 | } else if (n == 6) { | ||
105 | bn_sqr_comba6(t, a); | ||
106 | bn_montgomery_reduce_words(r, t, m, m0, n); | ||
107 | } else if (n == 8) { | ||
108 | bn_sqr_comba8(t, a); | ||
109 | bn_montgomery_reduce_words(r, t, m, m0, n); | ||
110 | } else { | ||
111 | bn_montgomery_multiply_words(r, a, a, m, t, m0, n); | ||
112 | } | ||
77 | } | 113 | } |
78 | #endif | 114 | #endif |
diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c index 950846fa5b..8280a8db27 100644 --- a/src/lib/libcrypto/bn/bn_mont.c +++ b/src/lib/libcrypto/bn/bn_mont.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_mont.c,v 1.68 2025/05/25 05:12:05 jsing Exp $ */ | 1 | /* $OpenBSD: bn_mont.c,v 1.69 2025/08/03 10:33:46 tb Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -116,6 +116,7 @@ | |||
116 | * sections 3.8 and 4.2 in http://security.ece.orst.edu/koc/papers/r01rsasw.pdf | 116 | * sections 3.8 and 4.2 in http://security.ece.orst.edu/koc/papers/r01rsasw.pdf |
117 | */ | 117 | */ |
118 | 118 | ||
119 | #include <limits.h> | ||
119 | #include <stdio.h> | 120 | #include <stdio.h> |
120 | #include <stdint.h> | 121 | #include <stdint.h> |
121 | #include <string.h> | 122 | #include <string.h> |
@@ -214,7 +215,7 @@ BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) | |||
214 | goto err; | 215 | goto err; |
215 | mont->N.neg = 0; | 216 | mont->N.neg = 0; |
216 | mont->ri = ((BN_num_bits(mod) + BN_BITS2 - 1) / BN_BITS2) * BN_BITS2; | 217 | mont->ri = ((BN_num_bits(mod) + BN_BITS2 - 1) / BN_BITS2) * BN_BITS2; |
217 | if (mont->ri * 2 < mont->ri) | 218 | if (mont->ri > INT_MAX / 2) |
218 | goto err; | 219 | goto err; |
219 | 220 | ||
220 | /* | 221 | /* |
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c index bdeb9b0fe8..a30d05fb02 100644 --- a/src/lib/libcrypto/bn/bn_mul.c +++ b/src/lib/libcrypto/bn/bn_mul.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_mul.c,v 1.39 2023/07/08 12:21:58 beck Exp $ */ | 1 | /* $OpenBSD: bn_mul.c,v 1.43 2025/08/14 15:15:04 jsing Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -57,6 +57,7 @@ | |||
57 | */ | 57 | */ |
58 | 58 | ||
59 | #include <assert.h> | 59 | #include <assert.h> |
60 | #include <limits.h> | ||
60 | #include <stdio.h> | 61 | #include <stdio.h> |
61 | #include <string.h> | 62 | #include <string.h> |
62 | 63 | ||
@@ -73,7 +74,7 @@ | |||
73 | */ | 74 | */ |
74 | #ifndef HAVE_BN_MUL_COMBA4 | 75 | #ifndef HAVE_BN_MUL_COMBA4 |
75 | void | 76 | void |
76 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 77 | bn_mul_comba4(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b) |
77 | { | 78 | { |
78 | BN_ULONG c0, c1, c2; | 79 | BN_ULONG c0, c1, c2; |
79 | 80 | ||
@@ -103,13 +104,73 @@ bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
103 | #endif | 104 | #endif |
104 | 105 | ||
105 | /* | 106 | /* |
107 | * bn_mul_comba6() computes r[] = a[] * b[] using Comba multiplication | ||
108 | * (https://everything2.com/title/Comba+multiplication), where a and b are both | ||
109 | * six word arrays, producing a 12 word array result. | ||
110 | */ | ||
111 | #ifndef HAVE_BN_MUL_COMBA6 | ||
112 | void | ||
113 | bn_mul_comba6(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b) | ||
114 | { | ||
115 | BN_ULONG c0, c1, c2; | ||
116 | |||
117 | bn_mulw_addtw(a[0], b[0], 0, 0, 0, &c2, &c1, &r[0]); | ||
118 | |||
119 | bn_mulw_addtw(a[0], b[1], 0, c2, c1, &c2, &c1, &c0); | ||
120 | bn_mulw_addtw(a[1], b[0], c2, c1, c0, &c2, &c1, &r[1]); | ||
121 | |||
122 | bn_mulw_addtw(a[2], b[0], 0, c2, c1, &c2, &c1, &c0); | ||
123 | bn_mulw_addtw(a[1], b[1], c2, c1, c0, &c2, &c1, &c0); | ||
124 | bn_mulw_addtw(a[0], b[2], c2, c1, c0, &c2, &c1, &r[2]); | ||
125 | |||
126 | bn_mulw_addtw(a[0], b[3], 0, c2, c1, &c2, &c1, &c0); | ||
127 | bn_mulw_addtw(a[1], b[2], c2, c1, c0, &c2, &c1, &c0); | ||
128 | bn_mulw_addtw(a[2], b[1], c2, c1, c0, &c2, &c1, &c0); | ||
129 | bn_mulw_addtw(a[3], b[0], c2, c1, c0, &c2, &c1, &r[3]); | ||
130 | |||
131 | bn_mulw_addtw(a[4], b[0], 0, c2, c1, &c2, &c1, &c0); | ||
132 | bn_mulw_addtw(a[3], b[1], c2, c1, c0, &c2, &c1, &c0); | ||
133 | bn_mulw_addtw(a[2], b[2], c2, c1, c0, &c2, &c1, &c0); | ||
134 | bn_mulw_addtw(a[1], b[3], c2, c1, c0, &c2, &c1, &c0); | ||
135 | bn_mulw_addtw(a[0], b[4], c2, c1, c0, &c2, &c1, &r[4]); | ||
136 | |||
137 | bn_mulw_addtw(a[0], b[5], 0, c2, c1, &c2, &c1, &c0); | ||
138 | bn_mulw_addtw(a[1], b[4], c2, c1, c0, &c2, &c1, &c0); | ||
139 | bn_mulw_addtw(a[2], b[3], c2, c1, c0, &c2, &c1, &c0); | ||
140 | bn_mulw_addtw(a[3], b[2], c2, c1, c0, &c2, &c1, &c0); | ||
141 | bn_mulw_addtw(a[4], b[1], c2, c1, c0, &c2, &c1, &c0); | ||
142 | bn_mulw_addtw(a[5], b[0], c2, c1, c0, &c2, &c1, &r[5]); | ||
143 | |||
144 | bn_mulw_addtw(a[5], b[1], 0, c2, c1, &c2, &c1, &c0); | ||
145 | bn_mulw_addtw(a[4], b[2], c2, c1, c0, &c2, &c1, &c0); | ||
146 | bn_mulw_addtw(a[3], b[3], c2, c1, c0, &c2, &c1, &c0); | ||
147 | bn_mulw_addtw(a[2], b[4], c2, c1, c0, &c2, &c1, &c0); | ||
148 | bn_mulw_addtw(a[1], b[5], c2, c1, c0, &c2, &c1, &r[6]); | ||
149 | |||
150 | bn_mulw_addtw(a[2], b[5], 0, c2, c1, &c2, &c1, &c0); | ||
151 | bn_mulw_addtw(a[3], b[4], c2, c1, c0, &c2, &c1, &c0); | ||
152 | bn_mulw_addtw(a[4], b[3], c2, c1, c0, &c2, &c1, &c0); | ||
153 | bn_mulw_addtw(a[5], b[2], c2, c1, c0, &c2, &c1, &r[7]); | ||
154 | |||
155 | bn_mulw_addtw(a[5], b[3], 0, c2, c1, &c2, &c1, &c0); | ||
156 | bn_mulw_addtw(a[4], b[4], c2, c1, c0, &c2, &c1, &c0); | ||
157 | bn_mulw_addtw(a[3], b[5], c2, c1, c0, &c2, &c1, &r[8]); | ||
158 | |||
159 | bn_mulw_addtw(a[4], b[5], 0, c2, c1, &c2, &c1, &c0); | ||
160 | bn_mulw_addtw(a[5], b[4], c2, c1, c0, &c2, &c1, &r[9]); | ||
161 | |||
162 | bn_mulw_addtw(a[5], b[5], 0, c2, c1, &c2, &r[11], &r[10]); | ||
163 | } | ||
164 | #endif | ||
165 | |||
166 | /* | ||
106 | * bn_mul_comba8() computes r[] = a[] * b[] using Comba multiplication | 167 | * bn_mul_comba8() computes r[] = a[] * b[] using Comba multiplication |
107 | * (https://everything2.com/title/Comba+multiplication), where a and b are both | 168 | * (https://everything2.com/title/Comba+multiplication), where a and b are both |
108 | * eight word arrays, producing a 16 word array result. | 169 | * eight word arrays, producing a 16 word array result. |
109 | */ | 170 | */ |
110 | #ifndef HAVE_BN_MUL_COMBA8 | 171 | #ifndef HAVE_BN_MUL_COMBA8 |
111 | void | 172 | void |
112 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 173 | bn_mul_comba8(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b) |
113 | { | 174 | { |
114 | BN_ULONG c0, c1, c2; | 175 | BN_ULONG c0, c1, c2; |
115 | 176 | ||
@@ -338,14 +399,16 @@ BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) | |||
338 | if (rr == NULL) | 399 | if (rr == NULL) |
339 | goto err; | 400 | goto err; |
340 | 401 | ||
341 | rn = a->top + b->top; | 402 | if (a->top > INT_MAX - b->top) |
342 | if (rn < a->top) | ||
343 | goto err; | 403 | goto err; |
404 | rn = a->top + b->top; | ||
344 | if (!bn_wexpand(rr, rn)) | 405 | if (!bn_wexpand(rr, rn)) |
345 | goto err; | 406 | goto err; |
346 | 407 | ||
347 | if (a->top == 4 && b->top == 4) { | 408 | if (a->top == 4 && b->top == 4) { |
348 | bn_mul_comba4(rr->d, a->d, b->d); | 409 | bn_mul_comba4(rr->d, a->d, b->d); |
410 | } else if (a->top == 6 && b->top == 6) { | ||
411 | bn_mul_comba6(rr->d, a->d, b->d); | ||
349 | } else if (a->top == 8 && b->top == 8) { | 412 | } else if (a->top == 8 && b->top == 8) { |
350 | bn_mul_comba8(rr->d, a->d, b->d); | 413 | bn_mul_comba8(rr->d, a->d, b->d); |
351 | } else { | 414 | } else { |
diff --git a/src/lib/libcrypto/bn/bn_sqr.c b/src/lib/libcrypto/bn/bn_sqr.c index 0dbccbf85d..2f7f71f819 100644 --- a/src/lib/libcrypto/bn/bn_sqr.c +++ b/src/lib/libcrypto/bn/bn_sqr.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_sqr.c,v 1.36 2023/07/08 12:21:58 beck Exp $ */ | 1 | /* $OpenBSD: bn_sqr.c,v 1.38 2025/08/14 15:15:04 jsing Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -97,6 +97,51 @@ bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | |||
97 | #endif | 97 | #endif |
98 | 98 | ||
99 | /* | 99 | /* |
100 | * bn_sqr_comba6() computes r[] = a[] * a[] using Comba multiplication | ||
101 | * (https://everything2.com/title/Comba+multiplication), where a is an | ||
102 | * six word array, producing an 12 word array result. | ||
103 | */ | ||
104 | #ifndef HAVE_BN_SQR_COMBA6 | ||
105 | void | ||
106 | bn_sqr_comba6(BN_ULONG *r, const BN_ULONG *a) | ||
107 | { | ||
108 | BN_ULONG c2, c1, c0; | ||
109 | |||
110 | bn_mulw_addtw(a[0], a[0], 0, 0, 0, &c2, &c1, &r[0]); | ||
111 | |||
112 | bn_mul2_mulw_addtw(a[1], a[0], 0, c2, c1, &c2, &c1, &r[1]); | ||
113 | |||
114 | bn_mulw_addtw(a[1], a[1], 0, c2, c1, &c2, &c1, &c0); | ||
115 | bn_mul2_mulw_addtw(a[2], a[0], c2, c1, c0, &c2, &c1, &r[2]); | ||
116 | |||
117 | bn_mul2_mulw_addtw(a[3], a[0], 0, c2, c1, &c2, &c1, &c0); | ||
118 | bn_mul2_mulw_addtw(a[2], a[1], c2, c1, c0, &c2, &c1, &r[3]); | ||
119 | |||
120 | bn_mulw_addtw(a[2], a[2], 0, c2, c1, &c2, &c1, &c0); | ||
121 | bn_mul2_mulw_addtw(a[3], a[1], c2, c1, c0, &c2, &c1, &c0); | ||
122 | bn_mul2_mulw_addtw(a[4], a[0], c2, c1, c0, &c2, &c1, &r[4]); | ||
123 | |||
124 | bn_mul2_mulw_addtw(a[5], a[0], 0, c2, c1, &c2, &c1, &c0); | ||
125 | bn_mul2_mulw_addtw(a[4], a[1], c2, c1, c0, &c2, &c1, &c0); | ||
126 | bn_mul2_mulw_addtw(a[3], a[2], c2, c1, c0, &c2, &c1, &r[5]); | ||
127 | |||
128 | bn_mulw_addtw(a[3], a[3], 0, c2, c1, &c2, &c1, &c0); | ||
129 | bn_mul2_mulw_addtw(a[4], a[2], c2, c1, c0, &c2, &c1, &c0); | ||
130 | bn_mul2_mulw_addtw(a[5], a[1], c2, c1, c0, &c2, &c1, &r[6]); | ||
131 | |||
132 | bn_mul2_mulw_addtw(a[5], a[2], 0, c2, c1, &c2, &c1, &c0); | ||
133 | bn_mul2_mulw_addtw(a[4], a[3], c2, c1, c0, &c2, &c1, &r[7]); | ||
134 | |||
135 | bn_mulw_addtw(a[4], a[4], 0, c2, c1, &c2, &c1, &c0); | ||
136 | bn_mul2_mulw_addtw(a[5], a[3], c2, c1, c0, &c2, &c1, &r[8]); | ||
137 | |||
138 | bn_mul2_mulw_addtw(a[5], a[4], 0, c2, c1, &c2, &c1, &r[9]); | ||
139 | |||
140 | bn_mulw_addtw(a[5], a[5], 0, c2, c1, &c2, &r[11], &r[10]); | ||
141 | } | ||
142 | #endif | ||
143 | |||
144 | /* | ||
100 | * bn_sqr_comba8() computes r[] = a[] * a[] using Comba multiplication | 145 | * bn_sqr_comba8() computes r[] = a[] * a[] using Comba multiplication |
101 | * (https://everything2.com/title/Comba+multiplication), where a is an | 146 | * (https://everything2.com/title/Comba+multiplication), where a is an |
102 | * eight word array, producing an 16 word array result. | 147 | * eight word array, producing an 16 word array result. |
@@ -281,6 +326,8 @@ BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) | |||
281 | 326 | ||
282 | if (a->top == 4) { | 327 | if (a->top == 4) { |
283 | bn_sqr_comba4(rr->d, a->d); | 328 | bn_sqr_comba4(rr->d, a->d); |
329 | } else if (a->top == 6) { | ||
330 | bn_sqr_comba6(rr->d, a->d); | ||
284 | } else if (a->top == 8) { | 331 | } else if (a->top == 8) { |
285 | bn_sqr_comba8(rr->d, a->d); | 332 | bn_sqr_comba8(rr->d, a->d); |
286 | } else { | 333 | } else { |
diff --git a/src/lib/libcrypto/bn/s2n_bignum.h b/src/lib/libcrypto/bn/s2n_bignum.h index ce6e8cdc94..7d77894cdc 100644 --- a/src/lib/libcrypto/bn/s2n_bignum.h +++ b/src/lib/libcrypto/bn/s2n_bignum.h | |||
@@ -1,3 +1,5 @@ | |||
1 | // $OpenBSD: s2n_bignum.h,v 1.4 2025/08/12 10:01:37 jsing Exp $ | ||
2 | // | ||
1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
2 | // | 4 | // |
3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
@@ -34,182 +36,240 @@ | |||
34 | // throughput, generally offering higher performance there. | 36 | // throughput, generally offering higher performance there. |
35 | // ---------------------------------------------------------------------------- | 37 | // ---------------------------------------------------------------------------- |
36 | 38 | ||
39 | |||
40 | #if defined(_MSC_VER) || !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L || defined(__STDC_NO_VLA__) | ||
41 | #define S2N_BIGNUM_STATIC | ||
42 | #else | ||
43 | #define S2N_BIGNUM_STATIC static | ||
44 | #endif | ||
45 | |||
37 | // Add, z := x + y | 46 | // Add, z := x + y |
38 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] | 47 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] |
39 | extern uint64_t bignum_add (uint64_t p, uint64_t *z, uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | 48 | extern uint64_t bignum_add (uint64_t p, uint64_t *z, uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y); |
40 | 49 | ||
41 | // Add modulo p_25519, z := (x + y) mod p_25519, assuming x and y reduced | 50 | // Add modulo p_25519, z := (x + y) mod p_25519, assuming x and y reduced |
42 | // Inputs x[4], y[4]; output z[4] | 51 | // Inputs x[4], y[4]; output z[4] |
43 | extern void bignum_add_p25519 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 52 | extern void bignum_add_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
44 | 53 | ||
45 | // Add modulo p_256, z := (x + y) mod p_256, assuming x and y reduced | 54 | // Add modulo p_256, z := (x + y) mod p_256, assuming x and y reduced |
46 | // Inputs x[4], y[4]; output z[4] | 55 | // Inputs x[4], y[4]; output z[4] |
47 | extern void bignum_add_p256 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 56 | extern void bignum_add_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
48 | 57 | ||
49 | // Add modulo p_256k1, z := (x + y) mod p_256k1, assuming x and y reduced | 58 | // Add modulo p_256k1, z := (x + y) mod p_256k1, assuming x and y reduced |
50 | // Inputs x[4], y[4]; output z[4] | 59 | // Inputs x[4], y[4]; output z[4] |
51 | extern void bignum_add_p256k1 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 60 | extern void bignum_add_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
52 | 61 | ||
53 | // Add modulo p_384, z := (x + y) mod p_384, assuming x and y reduced | 62 | // Add modulo p_384, z := (x + y) mod p_384, assuming x and y reduced |
54 | // Inputs x[6], y[6]; output z[6] | 63 | // Inputs x[6], y[6]; output z[6] |
55 | extern void bignum_add_p384 (uint64_t z[static 6], uint64_t x[static 6], uint64_t y[static 6]); | 64 | extern void bignum_add_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]); |
56 | 65 | ||
57 | // Add modulo p_521, z := (x + y) mod p_521, assuming x and y reduced | 66 | // Add modulo p_521, z := (x + y) mod p_521, assuming x and y reduced |
58 | // Inputs x[9], y[9]; output z[9] | 67 | // Inputs x[9], y[9]; output z[9] |
59 | extern void bignum_add_p521 (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); | 68 | extern void bignum_add_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9], const uint64_t y[S2N_BIGNUM_STATIC 9]); |
69 | |||
70 | // Add modulo p_sm2, z := (x + y) mod p_sm2, assuming x and y reduced | ||
71 | // Inputs x[4], y[4]; output z[4] | ||
72 | extern void bignum_add_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); | ||
60 | 73 | ||
61 | // Compute "amontification" constant z :== 2^{128k} (congruent mod m) | 74 | // Compute "amontification" constant z :== 2^{128k} (congruent mod m) |
62 | // Input m[k]; output z[k]; temporary buffer t[>=k] | 75 | // Input m[k]; output z[k]; temporary buffer t[>=k] |
63 | extern void bignum_amontifier (uint64_t k, uint64_t *z, uint64_t *m, uint64_t *t); | 76 | extern void bignum_amontifier (uint64_t k, uint64_t *z, const uint64_t *m, uint64_t *t); |
64 | 77 | ||
65 | // Almost-Montgomery multiply, z :== (x * y / 2^{64k}) (congruent mod m) | 78 | // Almost-Montgomery multiply, z :== (x * y / 2^{64k}) (congruent mod m) |
66 | // Inputs x[k], y[k], m[k]; output z[k] | 79 | // Inputs x[k], y[k], m[k]; output z[k] |
67 | extern void bignum_amontmul (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y, uint64_t *m); | 80 | extern void bignum_amontmul (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *y, const uint64_t *m); |
68 | 81 | ||
69 | // Almost-Montgomery reduce, z :== (x' / 2^{64p}) (congruent mod m) | 82 | // Almost-Montgomery reduce, z :== (x' / 2^{64p}) (congruent mod m) |
70 | // Inputs x[n], m[k], p; output z[k] | 83 | // Inputs x[n], m[k], p; output z[k] |
71 | extern void bignum_amontredc (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t *m, uint64_t p); | 84 | extern void bignum_amontredc (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x, const uint64_t *m, uint64_t p); |
72 | 85 | ||
73 | // Almost-Montgomery square, z :== (x^2 / 2^{64k}) (congruent mod m) | 86 | // Almost-Montgomery square, z :== (x^2 / 2^{64k}) (congruent mod m) |
74 | // Inputs x[k], m[k]; output z[k] | 87 | // Inputs x[k], m[k]; output z[k] |
75 | extern void bignum_amontsqr (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *m); | 88 | extern void bignum_amontsqr (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *m); |
76 | 89 | ||
77 | // Convert 4-digit (256-bit) bignum to/from big-endian form | 90 | // Convert 4-digit (256-bit) bignum to/from big-endian form |
78 | // Input x[4]; output z[4] | 91 | // Input x[4]; output z[4] |
79 | extern void bignum_bigendian_4 (uint64_t z[static 4], uint64_t x[static 4]); | 92 | extern void bignum_bigendian_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
80 | 93 | ||
81 | // Convert 6-digit (384-bit) bignum to/from big-endian form | 94 | // Convert 6-digit (384-bit) bignum to/from big-endian form |
82 | // Input x[6]; output z[6] | 95 | // Input x[6]; output z[6] |
83 | extern void bignum_bigendian_6 (uint64_t z[static 6], uint64_t x[static 6]); | 96 | extern void bignum_bigendian_6 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
84 | 97 | ||
85 | // Select bitfield starting at bit n with length l <= 64 | 98 | // Select bitfield starting at bit n with length l <= 64 |
86 | // Inputs x[k], n, l; output function return | 99 | // Inputs x[k], n, l; output function return |
87 | extern uint64_t bignum_bitfield (uint64_t k, uint64_t *x, uint64_t n, uint64_t l); | 100 | extern uint64_t bignum_bitfield (uint64_t k, const uint64_t *x, uint64_t n, uint64_t l); |
88 | 101 | ||
89 | // Return size of bignum in bits | 102 | // Return size of bignum in bits |
90 | // Input x[k]; output function return | 103 | // Input x[k]; output function return |
91 | extern uint64_t bignum_bitsize (uint64_t k, uint64_t *x); | 104 | extern uint64_t bignum_bitsize (uint64_t k, const uint64_t *x); |
92 | 105 | ||
93 | // Divide by a single (nonzero) word, z := x / m and return x mod m | 106 | // Divide by a single (nonzero) word, z := x / m and return x mod m |
94 | // Inputs x[n], m; outputs function return (remainder) and z[k] | 107 | // Inputs x[n], m; outputs function return (remainder) and z[k] |
95 | extern uint64_t bignum_cdiv (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t m); | 108 | extern uint64_t bignum_cdiv (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x, uint64_t m); |
96 | 109 | ||
97 | // Divide by a single word, z := x / m when known to be exact | 110 | // Divide by a single word, z := x / m when known to be exact |
98 | // Inputs x[n], m; output z[k] | 111 | // Inputs x[n], m; output z[k] |
99 | extern void bignum_cdiv_exact (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t m); | 112 | extern void bignum_cdiv_exact (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x, uint64_t m); |
100 | 113 | ||
101 | // Count leading zero digits (64-bit words) | 114 | // Count leading zero digits (64-bit words) |
102 | // Input x[k]; output function return | 115 | // Input x[k]; output function return |
103 | extern uint64_t bignum_cld (uint64_t k, uint64_t *x); | 116 | extern uint64_t bignum_cld (uint64_t k, const uint64_t *x); |
104 | 117 | ||
105 | // Count leading zero bits | 118 | // Count leading zero bits |
106 | // Input x[k]; output function return | 119 | // Input x[k]; output function return |
107 | extern uint64_t bignum_clz (uint64_t k, uint64_t *x); | 120 | extern uint64_t bignum_clz (uint64_t k, const uint64_t *x); |
108 | 121 | ||
109 | // Multiply-add with single-word multiplier, z := z + c * y | 122 | // Multiply-add with single-word multiplier, z := z + c * y |
110 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] | 123 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] |
111 | extern uint64_t bignum_cmadd (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); | 124 | extern uint64_t bignum_cmadd (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, const uint64_t *y); |
112 | 125 | ||
113 | // Negated multiply-add with single-word multiplier, z := z - c * y | 126 | // Negated multiply-add with single-word multiplier, z := z - c * y |
114 | // Inputs c, y[n]; outputs function return (negative carry-out) and z[k] | 127 | // Inputs c, y[n]; outputs function return (negative carry-out) and z[k] |
115 | extern uint64_t bignum_cmnegadd (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); | 128 | extern uint64_t bignum_cmnegadd (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, const uint64_t *y); |
116 | 129 | ||
117 | // Find modulus of bignum w.r.t. single nonzero word m, returning x mod m | 130 | // Find modulus of bignum w.r.t. single nonzero word m, returning x mod m |
118 | // Input x[k], m; output function return | 131 | // Input x[k], m; output function return |
119 | extern uint64_t bignum_cmod (uint64_t k, uint64_t *x, uint64_t m); | 132 | extern uint64_t bignum_cmod (uint64_t k, const uint64_t *x, uint64_t m); |
120 | 133 | ||
121 | // Multiply by a single word, z := c * y | 134 | // Multiply by a single word, z := c * y |
122 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] | 135 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] |
123 | extern uint64_t bignum_cmul (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); | 136 | extern uint64_t bignum_cmul (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, const uint64_t *y); |
124 | 137 | ||
125 | // Multiply by a single word modulo p_25519, z := (c * x) mod p_25519, assuming x reduced | 138 | // Multiply by a single word modulo p_25519, z := (c * x) mod p_25519, assuming x reduced |
126 | // Inputs c, x[4]; output z[4] | 139 | // Inputs c, x[4]; output z[4] |
127 | extern void bignum_cmul_p25519 (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]); | 140 | extern void bignum_cmul_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]); |
128 | extern void bignum_cmul_p25519_alt (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]); | 141 | extern void bignum_cmul_p25519_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]); |
129 | 142 | ||
130 | // Multiply by a single word modulo p_256, z := (c * x) mod p_256, assuming x reduced | 143 | // Multiply by a single word modulo p_256, z := (c * x) mod p_256, assuming x reduced |
131 | // Inputs c, x[4]; output z[4] | 144 | // Inputs c, x[4]; output z[4] |
132 | extern void bignum_cmul_p256 (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]); | 145 | extern void bignum_cmul_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]); |
133 | extern void bignum_cmul_p256_alt (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]); | 146 | extern void bignum_cmul_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]); |
134 | 147 | ||
135 | // Multiply by a single word modulo p_256k1, z := (c * x) mod p_256k1, assuming x reduced | 148 | // Multiply by a single word modulo p_256k1, z := (c * x) mod p_256k1, assuming x reduced |
136 | // Inputs c, x[4]; output z[4] | 149 | // Inputs c, x[4]; output z[4] |
137 | extern void bignum_cmul_p256k1 (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]); | 150 | extern void bignum_cmul_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]); |
138 | extern void bignum_cmul_p256k1_alt (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]); | 151 | extern void bignum_cmul_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]); |
139 | 152 | ||
140 | // Multiply by a single word modulo p_384, z := (c * x) mod p_384, assuming x reduced | 153 | // Multiply by a single word modulo p_384, z := (c * x) mod p_384, assuming x reduced |
141 | // Inputs c, x[6]; output z[6] | 154 | // Inputs c, x[6]; output z[6] |
142 | extern void bignum_cmul_p384 (uint64_t z[static 6], uint64_t c, uint64_t x[static 6]); | 155 | extern void bignum_cmul_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 6]); |
143 | extern void bignum_cmul_p384_alt (uint64_t z[static 6], uint64_t c, uint64_t x[static 6]); | 156 | extern void bignum_cmul_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 6]); |
144 | 157 | ||
145 | // Multiply by a single word modulo p_521, z := (c * x) mod p_521, assuming x reduced | 158 | // Multiply by a single word modulo p_521, z := (c * x) mod p_521, assuming x reduced |
146 | // Inputs c, x[9]; output z[9] | 159 | // Inputs c, x[9]; output z[9] |
147 | extern void bignum_cmul_p521 (uint64_t z[static 9], uint64_t c, uint64_t x[static 9]); | 160 | extern void bignum_cmul_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 9]); |
148 | extern void bignum_cmul_p521_alt (uint64_t z[static 9], uint64_t c, uint64_t x[static 9]); | 161 | extern void bignum_cmul_p521_alt (uint64_t z[S2N_BIGNUM_STATIC 9], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 9]); |
162 | |||
163 | // Multiply by a single word modulo p_sm2, z := (c * x) mod p_sm2, assuming x reduced | ||
164 | // Inputs c, x[4]; output z[4] | ||
165 | extern void bignum_cmul_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
166 | extern void bignum_cmul_sm2_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
149 | 167 | ||
150 | // Test bignums for coprimality, gcd(x,y) = 1 | 168 | // Test bignums for coprimality, gcd(x,y) = 1 |
151 | // Inputs x[m], y[n]; output function return; temporary buffer t[>=2*max(m,n)] | 169 | // Inputs x[m], y[n]; output function return; temporary buffer t[>=2*max(m,n)] |
152 | extern uint64_t bignum_coprime (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y, uint64_t *t); | 170 | extern uint64_t bignum_coprime (uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y, uint64_t *t); |
153 | 171 | ||
154 | // Copy bignum with zero-extension or truncation, z := x | 172 | // Copy bignum with zero-extension or truncation, z := x |
155 | // Input x[n]; output z[k] | 173 | // Input x[n]; output z[k] |
156 | extern void bignum_copy (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x); | 174 | extern void bignum_copy (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x); |
175 | |||
176 | // Given table: uint64_t[height*width], copy table[idx*width...(idx+1)*width-1] | ||
177 | // into z[0..width-1]. | ||
178 | // This function is constant-time with respect to the value of `idx`. This is | ||
179 | // achieved by reading the whole table and using the bit-masking to get the | ||
180 | // `idx`-th row. | ||
181 | // Input table[height*width]; output z[width] | ||
182 | extern void bignum_copy_row_from_table (uint64_t *z, const uint64_t *table, uint64_t height, | ||
183 | uint64_t width, uint64_t idx); | ||
184 | |||
185 | // Given table: uint64_t[height*width], copy table[idx*width...(idx+1)*width-1] | ||
186 | // into z[0..width-1]. width must be a multiple of 8. | ||
187 | // This function is constant-time with respect to the value of `idx`. This is | ||
188 | // achieved by reading the whole table and using the bit-masking to get the | ||
189 | // `idx`-th row. | ||
190 | // Input table[height*width]; output z[width] | ||
191 | extern void bignum_copy_row_from_table_8n (uint64_t *z, const uint64_t *table, | ||
192 | uint64_t height, uint64_t width, uint64_t idx); | ||
193 | |||
194 | // Given table: uint64_t[height*16], copy table[idx*16...(idx+1)*16-1] into z[0..row-1]. | ||
195 | // This function is constant-time with respect to the value of `idx`. This is | ||
196 | // achieved by reading the whole table and using the bit-masking to get the | ||
197 | // `idx`-th row. | ||
198 | // Input table[height*16]; output z[16] | ||
199 | extern void bignum_copy_row_from_table_16 (uint64_t *z, const uint64_t *table, | ||
200 | uint64_t height, uint64_t idx); | ||
201 | |||
202 | // Given table: uint64_t[height*32], copy table[idx*32...(idx+1)*32-1] into z[0..row-1]. | ||
203 | // This function is constant-time with respect to the value of `idx`. This is | ||
204 | // achieved by reading the whole table and using the bit-masking to get the | ||
205 | // `idx`-th row. | ||
206 | // Input table[height*32]; output z[32] | ||
207 | extern void bignum_copy_row_from_table_32 (uint64_t *z, const uint64_t *table, | ||
208 | uint64_t height, uint64_t idx); | ||
157 | 209 | ||
158 | // Count trailing zero digits (64-bit words) | 210 | // Count trailing zero digits (64-bit words) |
159 | // Input x[k]; output function return | 211 | // Input x[k]; output function return |
160 | extern uint64_t bignum_ctd (uint64_t k, uint64_t *x); | 212 | extern uint64_t bignum_ctd (uint64_t k, const uint64_t *x); |
161 | 213 | ||
162 | // Count trailing zero bits | 214 | // Count trailing zero bits |
163 | // Input x[k]; output function return | 215 | // Input x[k]; output function return |
164 | extern uint64_t bignum_ctz (uint64_t k, uint64_t *x); | 216 | extern uint64_t bignum_ctz (uint64_t k, const uint64_t *x); |
165 | 217 | ||
166 | // Convert from almost-Montgomery form, z := (x / 2^256) mod p_256 | 218 | // Convert from almost-Montgomery form, z := (x / 2^256) mod p_256 |
167 | // Input x[4]; output z[4] | 219 | // Input x[4]; output z[4] |
168 | extern void bignum_deamont_p256 (uint64_t z[static 4], uint64_t x[static 4]); | 220 | extern void bignum_deamont_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
169 | extern void bignum_deamont_p256_alt (uint64_t z[static 4], uint64_t x[static 4]); | 221 | extern void bignum_deamont_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
170 | 222 | ||
171 | // Convert from almost-Montgomery form, z := (x / 2^256) mod p_256k1 | 223 | // Convert from almost-Montgomery form, z := (x / 2^256) mod p_256k1 |
172 | // Input x[4]; output z[4] | 224 | // Input x[4]; output z[4] |
173 | extern void bignum_deamont_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); | 225 | extern void bignum_deamont_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
174 | 226 | ||
175 | // Convert from almost-Montgomery form, z := (x / 2^384) mod p_384 | 227 | // Convert from almost-Montgomery form, z := (x / 2^384) mod p_384 |
176 | // Input x[6]; output z[6] | 228 | // Input x[6]; output z[6] |
177 | extern void bignum_deamont_p384 (uint64_t z[static 6], uint64_t x[static 6]); | 229 | extern void bignum_deamont_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
178 | extern void bignum_deamont_p384_alt (uint64_t z[static 6], uint64_t x[static 6]); | 230 | extern void bignum_deamont_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
179 | 231 | ||
180 | // Convert from almost-Montgomery form z := (x / 2^576) mod p_521 | 232 | // Convert from almost-Montgomery form z := (x / 2^576) mod p_521 |
181 | // Input x[9]; output z[9] | 233 | // Input x[9]; output z[9] |
182 | extern void bignum_deamont_p521 (uint64_t z[static 9], uint64_t x[static 9]); | 234 | extern void bignum_deamont_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
235 | |||
236 | // Convert from almost-Montgomery form z := (x / 2^256) mod p_sm2 | ||
237 | // Input x[4]; output z[4] | ||
238 | extern void bignum_deamont_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
183 | 239 | ||
184 | // Convert from (almost-)Montgomery form z := (x / 2^{64k}) mod m | 240 | // Convert from (almost-)Montgomery form z := (x / 2^{64k}) mod m |
185 | // Inputs x[k], m[k]; output z[k] | 241 | // Inputs x[k], m[k]; output z[k] |
186 | extern void bignum_demont (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *m); | 242 | extern void bignum_demont (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *m); |
187 | 243 | ||
188 | // Convert from Montgomery form z := (x / 2^256) mod p_256, assuming x reduced | 244 | // Convert from Montgomery form z := (x / 2^256) mod p_256, assuming x reduced |
189 | // Input x[4]; output z[4] | 245 | // Input x[4]; output z[4] |
190 | extern void bignum_demont_p256 (uint64_t z[static 4], uint64_t x[static 4]); | 246 | extern void bignum_demont_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
191 | extern void bignum_demont_p256_alt (uint64_t z[static 4], uint64_t x[static 4]); | 247 | extern void bignum_demont_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
192 | 248 | ||
193 | // Convert from Montgomery form z := (x / 2^256) mod p_256k1, assuming x reduced | 249 | // Convert from Montgomery form z := (x / 2^256) mod p_256k1, assuming x reduced |
194 | // Input x[4]; output z[4] | 250 | // Input x[4]; output z[4] |
195 | extern void bignum_demont_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); | 251 | extern void bignum_demont_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
196 | 252 | ||
197 | // Convert from Montgomery form z := (x / 2^384) mod p_384, assuming x reduced | 253 | // Convert from Montgomery form z := (x / 2^384) mod p_384, assuming x reduced |
198 | // Input x[6]; output z[6] | 254 | // Input x[6]; output z[6] |
199 | extern void bignum_demont_p384 (uint64_t z[static 6], uint64_t x[static 6]); | 255 | extern void bignum_demont_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
200 | extern void bignum_demont_p384_alt (uint64_t z[static 6], uint64_t x[static 6]); | 256 | extern void bignum_demont_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
201 | 257 | ||
202 | // Convert from Montgomery form z := (x / 2^576) mod p_521, assuming x reduced | 258 | // Convert from Montgomery form z := (x / 2^576) mod p_521, assuming x reduced |
203 | // Input x[9]; output z[9] | 259 | // Input x[9]; output z[9] |
204 | extern void bignum_demont_p521 (uint64_t z[static 9], uint64_t x[static 9]); | 260 | extern void bignum_demont_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
261 | |||
262 | // Convert from Montgomery form z := (x / 2^256) mod p_sm2, assuming x reduced | ||
263 | // Input x[4]; output z[4] | ||
264 | extern void bignum_demont_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
205 | 265 | ||
206 | // Select digit x[n] | 266 | // Select digit x[n] |
207 | // Inputs x[k], n; output function return | 267 | // Inputs x[k], n; output function return |
208 | extern uint64_t bignum_digit (uint64_t k, uint64_t *x, uint64_t n); | 268 | extern uint64_t bignum_digit (uint64_t k, const uint64_t *x, uint64_t n); |
209 | 269 | ||
210 | // Return size of bignum in digits (64-bit word) | 270 | // Return size of bignum in digits (64-bit word) |
211 | // Input x[k]; output function return | 271 | // Input x[k]; output function return |
212 | extern uint64_t bignum_digitsize (uint64_t k, uint64_t *x); | 272 | extern uint64_t bignum_digitsize (uint64_t k, const uint64_t *x); |
213 | 273 | ||
214 | // Divide bignum by 10: z' := z div 10, returning remainder z mod 10 | 274 | // Divide bignum by 10: z' := z div 10, returning remainder z mod 10 |
215 | // Inputs z[k]; outputs function return (remainder) and z[k] | 275 | // Inputs z[k]; outputs function return (remainder) and z[k] |
@@ -217,294 +277,391 @@ extern uint64_t bignum_divmod10 (uint64_t k, uint64_t *z); | |||
217 | 277 | ||
218 | // Double modulo p_25519, z := (2 * x) mod p_25519, assuming x reduced | 278 | // Double modulo p_25519, z := (2 * x) mod p_25519, assuming x reduced |
219 | // Input x[4]; output z[4] | 279 | // Input x[4]; output z[4] |
220 | extern void bignum_double_p25519 (uint64_t z[static 4], uint64_t x[static 4]); | 280 | extern void bignum_double_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
221 | 281 | ||
222 | // Double modulo p_256, z := (2 * x) mod p_256, assuming x reduced | 282 | // Double modulo p_256, z := (2 * x) mod p_256, assuming x reduced |
223 | // Input x[4]; output z[4] | 283 | // Input x[4]; output z[4] |
224 | extern void bignum_double_p256 (uint64_t z[static 4], uint64_t x[static 4]); | 284 | extern void bignum_double_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
225 | 285 | ||
226 | // Double modulo p_256k1, z := (2 * x) mod p_256k1, assuming x reduced | 286 | // Double modulo p_256k1, z := (2 * x) mod p_256k1, assuming x reduced |
227 | // Input x[4]; output z[4] | 287 | // Input x[4]; output z[4] |
228 | extern void bignum_double_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); | 288 | extern void bignum_double_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
229 | 289 | ||
230 | // Double modulo p_384, z := (2 * x) mod p_384, assuming x reduced | 290 | // Double modulo p_384, z := (2 * x) mod p_384, assuming x reduced |
231 | // Input x[6]; output z[6] | 291 | // Input x[6]; output z[6] |
232 | extern void bignum_double_p384 (uint64_t z[static 6], uint64_t x[static 6]); | 292 | extern void bignum_double_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
233 | 293 | ||
234 | // Double modulo p_521, z := (2 * x) mod p_521, assuming x reduced | 294 | // Double modulo p_521, z := (2 * x) mod p_521, assuming x reduced |
235 | // Input x[9]; output z[9] | 295 | // Input x[9]; output z[9] |
236 | extern void bignum_double_p521 (uint64_t z[static 9], uint64_t x[static 9]); | 296 | extern void bignum_double_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
297 | |||
298 | // Double modulo p_sm2, z := (2 * x) mod p_sm2, assuming x reduced | ||
299 | // Input x[4]; output z[4] | ||
300 | extern void bignum_double_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
237 | 301 | ||
238 | // Extended Montgomery reduce, returning results in input-output buffer | 302 | // Extended Montgomery reduce, returning results in input-output buffer |
239 | // Inputs z[2*k], m[k], w; outputs function return (extra result bit) and z[2*k] | 303 | // Inputs z[2*k], m[k], w; outputs function return (extra result bit) and z[2*k] |
240 | extern uint64_t bignum_emontredc (uint64_t k, uint64_t *z, uint64_t *m, uint64_t w); | 304 | extern uint64_t bignum_emontredc (uint64_t k, uint64_t *z, const uint64_t *m, uint64_t w); |
241 | 305 | ||
242 | // Extended Montgomery reduce in 8-digit blocks, results in input-output buffer | 306 | // Extended Montgomery reduce in 8-digit blocks, results in input-output buffer |
243 | // Inputs z[2*k], m[k], w; outputs function return (extra result bit) and z[2*k] | 307 | // Inputs z[2*k], m[k], w; outputs function return (extra result bit) and z[2*k] |
244 | extern uint64_t bignum_emontredc_8n (uint64_t k, uint64_t *z, uint64_t *m, uint64_t w); | 308 | extern uint64_t bignum_emontredc_8n (uint64_t k, uint64_t *z, const uint64_t *m, uint64_t w); |
309 | // Inputs z[2*k], m[k], w; outputs function return (extra result bit) and z[2*k] | ||
310 | // Temporary buffer m_precalc[12*(k/4-1)] | ||
311 | extern uint64_t bignum_emontredc_8n_cdiff (uint64_t k, uint64_t *z, const uint64_t *m, | ||
312 | uint64_t w, uint64_t *m_precalc); | ||
245 | 313 | ||
246 | // Test bignums for equality, x = y | 314 | // Test bignums for equality, x = y |
247 | // Inputs x[m], y[n]; output function return | 315 | // Inputs x[m], y[n]; output function return |
248 | extern uint64_t bignum_eq (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | 316 | extern uint64_t bignum_eq (uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y); |
249 | 317 | ||
250 | // Test bignum for even-ness | 318 | // Test bignum for even-ness |
251 | // Input x[k]; output function return | 319 | // Input x[k]; output function return |
252 | extern uint64_t bignum_even (uint64_t k, uint64_t *x); | 320 | extern uint64_t bignum_even (uint64_t k, const uint64_t *x); |
253 | 321 | ||
254 | // Convert 4-digit (256-bit) bignum from big-endian bytes | 322 | // Convert 4-digit (256-bit) bignum from big-endian bytes |
255 | // Input x[32] (bytes); output z[4] | 323 | // Input x[32] (bytes); output z[4] |
256 | extern void bignum_frombebytes_4 (uint64_t z[static 4], uint8_t x[static 32]); | 324 | extern void bignum_frombebytes_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint8_t x[S2N_BIGNUM_STATIC 32]); |
257 | 325 | ||
258 | // Convert 6-digit (384-bit) bignum from big-endian bytes | 326 | // Convert 6-digit (384-bit) bignum from big-endian bytes |
259 | // Input x[48] (bytes); output z[6] | 327 | // Input x[48] (bytes); output z[6] |
260 | extern void bignum_frombebytes_6 (uint64_t z[static 6], uint8_t x[static 48]); | 328 | extern void bignum_frombebytes_6 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint8_t x[S2N_BIGNUM_STATIC 48]); |
261 | 329 | ||
262 | // Convert 4-digit (256-bit) bignum from little-endian bytes | 330 | // Convert 4-digit (256-bit) bignum from little-endian bytes |
263 | // Input x[32] (bytes); output z[4] | 331 | // Input x[32] (bytes); output z[4] |
264 | extern void bignum_fromlebytes_4 (uint64_t z[static 4], uint8_t x[static 32]); | 332 | extern void bignum_fromlebytes_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint8_t x[S2N_BIGNUM_STATIC 32]); |
265 | 333 | ||
266 | // Convert 6-digit (384-bit) bignum from little-endian bytes | 334 | // Convert 6-digit (384-bit) bignum from little-endian bytes |
267 | // Input x[48] (bytes); output z[6] | 335 | // Input x[48] (bytes); output z[6] |
268 | extern void bignum_fromlebytes_6 (uint64_t z[static 6], uint8_t x[static 48]); | 336 | extern void bignum_fromlebytes_6 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint8_t x[S2N_BIGNUM_STATIC 48]); |
269 | 337 | ||
270 | // Convert little-endian bytes to 9-digit 528-bit bignum | 338 | // Convert little-endian bytes to 9-digit 528-bit bignum |
271 | // Input x[66] (bytes); output z[9] | 339 | // Input x[66] (bytes); output z[9] |
272 | extern void bignum_fromlebytes_p521 (uint64_t z[static 9],uint8_t x[static 66]); | 340 | extern void bignum_fromlebytes_p521 (uint64_t z[S2N_BIGNUM_STATIC 9],const uint8_t x[S2N_BIGNUM_STATIC 66]); |
273 | 341 | ||
274 | // Compare bignums, x >= y | 342 | // Compare bignums, x >= y |
275 | // Inputs x[m], y[n]; output function return | 343 | // Inputs x[m], y[n]; output function return |
276 | extern uint64_t bignum_ge (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | 344 | extern uint64_t bignum_ge (uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y); |
277 | 345 | ||
278 | // Compare bignums, x > y | 346 | // Compare bignums, x > y |
279 | // Inputs x[m], y[n]; output function return | 347 | // Inputs x[m], y[n]; output function return |
280 | extern uint64_t bignum_gt (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | 348 | extern uint64_t bignum_gt (uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y); |
281 | 349 | ||
282 | // Halve modulo p_256, z := (x / 2) mod p_256, assuming x reduced | 350 | // Halve modulo p_256, z := (x / 2) mod p_256, assuming x reduced |
283 | // Input x[4]; output z[4] | 351 | // Input x[4]; output z[4] |
284 | extern void bignum_half_p256 (uint64_t z[static 4], uint64_t x[static 4]); | 352 | extern void bignum_half_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
285 | 353 | ||
286 | // Halve modulo p_256k1, z := (x / 2) mod p_256k1, assuming x reduced | 354 | // Halve modulo p_256k1, z := (x / 2) mod p_256k1, assuming x reduced |
287 | // Input x[4]; output z[4] | 355 | // Input x[4]; output z[4] |
288 | extern void bignum_half_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); | 356 | extern void bignum_half_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
289 | 357 | ||
290 | // Halve modulo p_384, z := (x / 2) mod p_384, assuming x reduced | 358 | // Halve modulo p_384, z := (x / 2) mod p_384, assuming x reduced |
291 | // Input x[6]; output z[6] | 359 | // Input x[6]; output z[6] |
292 | extern void bignum_half_p384 (uint64_t z[static 6], uint64_t x[static 6]); | 360 | extern void bignum_half_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
293 | 361 | ||
294 | // Halve modulo p_521, z := (x / 2) mod p_521, assuming x reduced | 362 | // Halve modulo p_521, z := (x / 2) mod p_521, assuming x reduced |
295 | // Input x[9]; output z[9] | 363 | // Input x[9]; output z[9] |
296 | extern void bignum_half_p521 (uint64_t z[static 9], uint64_t x[static 9]); | 364 | extern void bignum_half_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
365 | |||
366 | // Halve modulo p_sm2, z := (x / 2) mod p_sm2, assuming x reduced | ||
367 | // Input x[4]; output z[4] | ||
368 | extern void bignum_half_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
369 | |||
370 | // Modular inverse modulo p_25519 = 2^255 - 19 | ||
371 | // Input x[4]; output z[4] | ||
372 | extern void bignum_inv_p25519(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
373 | |||
374 | // Modular inverse modulo p_256 = 2^256 - 2^224 + 2^192 + 2^96 - 1 | ||
375 | // Input x[4]; output z[4] | ||
376 | extern void bignum_inv_p256(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
377 | |||
378 | // Modular inverse modulo p_384 = 2^384 - 2^128 - 2^96 + 2^32 - 1 | ||
379 | // Input x[6]; output z[6] | ||
380 | extern void bignum_inv_p384(uint64_t z[S2N_BIGNUM_STATIC 6],const uint64_t x[S2N_BIGNUM_STATIC 6]); | ||
381 | |||
382 | // Modular inverse modulo p_521 = 2^521 - 1 | ||
383 | // Input x[9]; output z[9] | ||
384 | extern void bignum_inv_p521(uint64_t z[S2N_BIGNUM_STATIC 9],const uint64_t x[S2N_BIGNUM_STATIC 9]); | ||
385 | |||
386 | // Modular inverse modulo p_sm2 = 2^256 - 2^224 - 2^96 + 2^64 - 1 | ||
387 | // Input x[4]; output z[4] | ||
388 | extern void bignum_inv_sm2(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
389 | |||
390 | // Inverse square root modulo p_25519 | ||
391 | // Input x[4]; output function return (Legendre symbol) and z[4] | ||
392 | extern int64_t bignum_invsqrt_p25519(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
393 | extern int64_t bignum_invsqrt_p25519_alt(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
297 | 394 | ||
298 | // Test bignum for zero-ness, x = 0 | 395 | // Test bignum for zero-ness, x = 0 |
299 | // Input x[k]; output function return | 396 | // Input x[k]; output function return |
300 | extern uint64_t bignum_iszero (uint64_t k, uint64_t *x); | 397 | extern uint64_t bignum_iszero (uint64_t k, const uint64_t *x); |
301 | 398 | ||
302 | // Multiply z := x * y | 399 | // Multiply z := x * y |
303 | // Inputs x[16], y[16]; output z[32]; temporary buffer t[>=32] | 400 | // Inputs x[16], y[16]; output z[32]; temporary buffer t[>=32] |
304 | extern void bignum_kmul_16_32 (uint64_t z[static 32], uint64_t x[static 16], uint64_t y[static 16], uint64_t t[static 32]); | 401 | extern void bignum_kmul_16_32 (uint64_t z[S2N_BIGNUM_STATIC 32], const uint64_t x[S2N_BIGNUM_STATIC 16], const uint64_t y[S2N_BIGNUM_STATIC 16], uint64_t t[S2N_BIGNUM_STATIC 32]); |
305 | 402 | ||
306 | // Multiply z := x * y | 403 | // Multiply z := x * y |
307 | // Inputs x[32], y[32]; output z[64]; temporary buffer t[>=96] | 404 | // Inputs x[32], y[32]; output z[64]; temporary buffer t[>=96] |
308 | extern void bignum_kmul_32_64 (uint64_t z[static 64], uint64_t x[static 32], uint64_t y[static 32], uint64_t t[static 96]); | 405 | extern void bignum_kmul_32_64 (uint64_t z[S2N_BIGNUM_STATIC 64], const uint64_t x[S2N_BIGNUM_STATIC 32], const uint64_t y[S2N_BIGNUM_STATIC 32], uint64_t t[S2N_BIGNUM_STATIC 96]); |
309 | 406 | ||
310 | // Square, z := x^2 | 407 | // Square, z := x^2 |
311 | // Input x[16]; output z[32]; temporary buffer t[>=24] | 408 | // Input x[16]; output z[32]; temporary buffer t[>=24] |
312 | extern void bignum_ksqr_16_32 (uint64_t z[static 32], uint64_t x[static 16], uint64_t t[static 24]); | 409 | extern void bignum_ksqr_16_32 (uint64_t z[S2N_BIGNUM_STATIC 32], const uint64_t x[S2N_BIGNUM_STATIC 16], uint64_t t[S2N_BIGNUM_STATIC 24]); |
313 | 410 | ||
314 | // Square, z := x^2 | 411 | // Square, z := x^2 |
315 | // Input x[32]; output z[64]; temporary buffer t[>=72] | 412 | // Input x[32]; output z[64]; temporary buffer t[>=72] |
316 | extern void bignum_ksqr_32_64 (uint64_t z[static 64], uint64_t x[static 32], uint64_t t[static 72]); | 413 | extern void bignum_ksqr_32_64 (uint64_t z[S2N_BIGNUM_STATIC 64], const uint64_t x[S2N_BIGNUM_STATIC 32], uint64_t t[S2N_BIGNUM_STATIC 72]); |
317 | 414 | ||
318 | // Compare bignums, x <= y | 415 | // Compare bignums, x <= y |
319 | // Inputs x[m], y[n]; output function return | 416 | // Inputs x[m], y[n]; output function return |
320 | extern uint64_t bignum_le (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | 417 | extern uint64_t bignum_le (uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y); |
321 | 418 | ||
322 | // Convert 4-digit (256-bit) bignum to/from little-endian form | 419 | // Convert 4-digit (256-bit) bignum to/from little-endian form |
323 | // Input x[4]; output z[4] | 420 | // Input x[4]; output z[4] |
324 | extern void bignum_littleendian_4 (uint64_t z[static 4], uint64_t x[static 4]); | 421 | extern void bignum_littleendian_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
325 | 422 | ||
326 | // Convert 6-digit (384-bit) bignum to/from little-endian form | 423 | // Convert 6-digit (384-bit) bignum to/from little-endian form |
327 | // Input x[6]; output z[6] | 424 | // Input x[6]; output z[6] |
328 | extern void bignum_littleendian_6 (uint64_t z[static 6], uint64_t x[static 6]); | 425 | extern void bignum_littleendian_6 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
329 | 426 | ||
330 | // Compare bignums, x < y | 427 | // Compare bignums, x < y |
331 | // Inputs x[m], y[n]; output function return | 428 | // Inputs x[m], y[n]; output function return |
332 | extern uint64_t bignum_lt (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | 429 | extern uint64_t bignum_lt (uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y); |
333 | 430 | ||
334 | // Multiply-add, z := z + x * y | 431 | // Multiply-add, z := z + x * y |
335 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[k] | 432 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[k] |
336 | extern uint64_t bignum_madd (uint64_t k, uint64_t *z, uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | 433 | extern uint64_t bignum_madd (uint64_t k, uint64_t *z, uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y); |
434 | |||
435 | // Multiply-add modulo the order of the curve25519/edwards25519 basepoint | ||
436 | // Inputs x[4], y[4], c[4]; output z[4] | ||
437 | extern void bignum_madd_n25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4], const uint64_t c[S2N_BIGNUM_STATIC 4]); | ||
438 | extern void bignum_madd_n25519_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4], const uint64_t c[S2N_BIGNUM_STATIC 4]); | ||
439 | |||
440 | // Reduce modulo group order, z := x mod m_25519 | ||
441 | // Input x[4]; output z[4] | ||
442 | extern void bignum_mod_m25519_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
443 | |||
444 | // Reduce modulo basepoint order, z := x mod n_25519 | ||
445 | // Input x[k]; output z[4] | ||
446 | extern void bignum_mod_n25519 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x); | ||
447 | |||
448 | // Reduce modulo basepoint order, z := x mod n_25519 | ||
449 | // Input x[4]; output z[4] | ||
450 | extern void bignum_mod_n25519_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
337 | 451 | ||
338 | // Reduce modulo group order, z := x mod n_256 | 452 | // Reduce modulo group order, z := x mod n_256 |
339 | // Input x[k]; output z[4] | 453 | // Input x[k]; output z[4] |
340 | extern void bignum_mod_n256 (uint64_t z[static 4], uint64_t k, uint64_t *x); | 454 | extern void bignum_mod_n256 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x); |
341 | extern void bignum_mod_n256_alt (uint64_t z[static 4], uint64_t k, uint64_t *x); | 455 | extern void bignum_mod_n256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x); |
342 | 456 | ||
343 | // Reduce modulo group order, z := x mod n_256 | 457 | // Reduce modulo group order, z := x mod n_256 |
344 | // Input x[4]; output z[4] | 458 | // Input x[4]; output z[4] |
345 | extern void bignum_mod_n256_4 (uint64_t z[static 4], uint64_t x[static 4]); | 459 | extern void bignum_mod_n256_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
346 | 460 | ||
347 | // Reduce modulo group order, z := x mod n_256k1 | 461 | // Reduce modulo group order, z := x mod n_256k1 |
348 | // Input x[4]; output z[4] | 462 | // Input x[4]; output z[4] |
349 | extern void bignum_mod_n256k1_4 (uint64_t z[static 4], uint64_t x[static 4]); | 463 | extern void bignum_mod_n256k1_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
350 | 464 | ||
351 | // Reduce modulo group order, z := x mod n_384 | 465 | // Reduce modulo group order, z := x mod n_384 |
352 | // Input x[k]; output z[6] | 466 | // Input x[k]; output z[6] |
353 | extern void bignum_mod_n384 (uint64_t z[static 6], uint64_t k, uint64_t *x); | 467 | extern void bignum_mod_n384 (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t k, const uint64_t *x); |
354 | extern void bignum_mod_n384_alt (uint64_t z[static 6], uint64_t k, uint64_t *x); | 468 | extern void bignum_mod_n384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t k, const uint64_t *x); |
355 | 469 | ||
356 | // Reduce modulo group order, z := x mod n_384 | 470 | // Reduce modulo group order, z := x mod n_384 |
357 | // Input x[6]; output z[6] | 471 | // Input x[6]; output z[6] |
358 | extern void bignum_mod_n384_6 (uint64_t z[static 6], uint64_t x[static 6]); | 472 | extern void bignum_mod_n384_6 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
359 | 473 | ||
360 | // Reduce modulo group order, z := x mod n_521 | 474 | // Reduce modulo group order, z := x mod n_521 |
361 | // Input x[9]; output z[9] | 475 | // Input x[9]; output z[9] |
362 | extern void bignum_mod_n521_9 (uint64_t z[static 9], uint64_t x[static 9]); | 476 | extern void bignum_mod_n521_9 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
363 | extern void bignum_mod_n521_9_alt (uint64_t z[static 9], uint64_t x[static 9]); | 477 | extern void bignum_mod_n521_9_alt (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
478 | |||
479 | // Reduce modulo group order, z := x mod n_sm2 | ||
480 | // Input x[k]; output z[4] | ||
481 | extern void bignum_mod_nsm2 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x); | ||
482 | extern void bignum_mod_nsm2_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x); | ||
483 | |||
484 | // Reduce modulo group order, z := x mod n_sm2 | ||
485 | // Input x[4]; output z[4] | ||
486 | extern void bignum_mod_nsm2_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
364 | 487 | ||
365 | // Reduce modulo field characteristic, z := x mod p_25519 | 488 | // Reduce modulo field characteristic, z := x mod p_25519 |
366 | // Input x[4]; output z[4] | 489 | // Input x[4]; output z[4] |
367 | extern void bignum_mod_p25519_4 (uint64_t z[static 4], uint64_t x[static 4]); | 490 | extern void bignum_mod_p25519_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
368 | 491 | ||
369 | // Reduce modulo field characteristic, z := x mod p_256 | 492 | // Reduce modulo field characteristic, z := x mod p_256 |
370 | // Input x[k]; output z[4] | 493 | // Input x[k]; output z[4] |
371 | extern void bignum_mod_p256 (uint64_t z[static 4], uint64_t k, uint64_t *x); | 494 | extern void bignum_mod_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x); |
372 | extern void bignum_mod_p256_alt (uint64_t z[static 4], uint64_t k, uint64_t *x); | 495 | extern void bignum_mod_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x); |
373 | 496 | ||
374 | // Reduce modulo field characteristic, z := x mod p_256 | 497 | // Reduce modulo field characteristic, z := x mod p_256 |
375 | // Input x[4]; output z[4] | 498 | // Input x[4]; output z[4] |
376 | extern void bignum_mod_p256_4 (uint64_t z[static 4], uint64_t x[static 4]); | 499 | extern void bignum_mod_p256_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
377 | 500 | ||
378 | // Reduce modulo field characteristic, z := x mod p_256k1 | 501 | // Reduce modulo field characteristic, z := x mod p_256k1 |
379 | // Input x[4]; output z[4] | 502 | // Input x[4]; output z[4] |
380 | extern void bignum_mod_p256k1_4 (uint64_t z[static 4], uint64_t x[static 4]); | 503 | extern void bignum_mod_p256k1_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
381 | 504 | ||
382 | // Reduce modulo field characteristic, z := x mod p_384 | 505 | // Reduce modulo field characteristic, z := x mod p_384 |
383 | // Input x[k]; output z[6] | 506 | // Input x[k]; output z[6] |
384 | extern void bignum_mod_p384 (uint64_t z[static 6], uint64_t k, uint64_t *x); | 507 | extern void bignum_mod_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t k, const uint64_t *x); |
385 | extern void bignum_mod_p384_alt (uint64_t z[static 6], uint64_t k, uint64_t *x); | 508 | extern void bignum_mod_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t k, const uint64_t *x); |
386 | 509 | ||
387 | // Reduce modulo field characteristic, z := x mod p_384 | 510 | // Reduce modulo field characteristic, z := x mod p_384 |
388 | // Input x[6]; output z[6] | 511 | // Input x[6]; output z[6] |
389 | extern void bignum_mod_p384_6 (uint64_t z[static 6], uint64_t x[static 6]); | 512 | extern void bignum_mod_p384_6 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
390 | 513 | ||
391 | // Reduce modulo field characteristic, z := x mod p_521 | 514 | // Reduce modulo field characteristic, z := x mod p_521 |
392 | // Input x[9]; output z[9] | 515 | // Input x[9]; output z[9] |
393 | extern void bignum_mod_p521_9 (uint64_t z[static 9], uint64_t x[static 9]); | 516 | extern void bignum_mod_p521_9 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
517 | |||
518 | // Reduce modulo field characteristic, z := x mod p_sm2 | ||
519 | // Input x[k]; output z[4] | ||
520 | extern void bignum_mod_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x); | ||
521 | |||
522 | // Reduce modulo field characteristic, z := x mod p_sm2 | ||
523 | // Input x[4]; output z[4] | ||
524 | extern void bignum_mod_sm2_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
394 | 525 | ||
395 | // Add modulo m, z := (x + y) mod m, assuming x and y reduced | 526 | // Add modulo m, z := (x + y) mod m, assuming x and y reduced |
396 | // Inputs x[k], y[k], m[k]; output z[k] | 527 | // Inputs x[k], y[k], m[k]; output z[k] |
397 | extern void bignum_modadd (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y, uint64_t *m); | 528 | extern void bignum_modadd (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *y, const uint64_t *m); |
398 | 529 | ||
399 | // Double modulo m, z := (2 * x) mod m, assuming x reduced | 530 | // Double modulo m, z := (2 * x) mod m, assuming x reduced |
400 | // Inputs x[k], m[k]; output z[k] | 531 | // Inputs x[k], m[k]; output z[k] |
401 | extern void bignum_moddouble (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *m); | 532 | extern void bignum_moddouble (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *m); |
533 | |||
534 | // Modular exponentiation for arbitrary odd modulus, z := (a^p) mod m | ||
535 | // Inputs a[k], p[k], m[k]; output z[k], temporary buffer t[>=3*k] | ||
536 | extern void bignum_modexp(uint64_t k,uint64_t *z, const uint64_t *a,const uint64_t *p,const uint64_t *m,uint64_t *t); | ||
402 | 537 | ||
403 | // Compute "modification" constant z := 2^{64k} mod m | 538 | // Compute "modification" constant z := 2^{64k} mod m |
404 | // Input m[k]; output z[k]; temporary buffer t[>=k] | 539 | // Input m[k]; output z[k]; temporary buffer t[>=k] |
405 | extern void bignum_modifier (uint64_t k, uint64_t *z, uint64_t *m, uint64_t *t); | 540 | extern void bignum_modifier (uint64_t k, uint64_t *z, const uint64_t *m, uint64_t *t); |
406 | 541 | ||
407 | // Invert modulo m, z = (1/a) mod b, assuming b is an odd number > 1, a coprime to b | 542 | // Invert modulo m, z = (1/a) mod b, assuming b is an odd number > 1, a coprime to b |
408 | // Inputs a[k], b[k]; output z[k]; temporary buffer t[>=3*k] | 543 | // Inputs a[k], b[k]; output z[k]; temporary buffer t[>=3*k] |
409 | extern void bignum_modinv (uint64_t k, uint64_t *z, uint64_t *a, uint64_t *b, uint64_t *t); | 544 | extern void bignum_modinv (uint64_t k, uint64_t *z, const uint64_t *a, const uint64_t *b, uint64_t *t); |
410 | 545 | ||
411 | // Optionally negate modulo m, z := (-x) mod m (if p nonzero) or z := x (if p zero), assuming x reduced | 546 | // Optionally negate modulo m, z := (-x) mod m (if p nonzero) or z := x (if p zero), assuming x reduced |
412 | // Inputs p, x[k], m[k]; output z[k] | 547 | // Inputs p, x[k], m[k]; output z[k] |
413 | extern void bignum_modoptneg (uint64_t k, uint64_t *z, uint64_t p, uint64_t *x, uint64_t *m); | 548 | extern void bignum_modoptneg (uint64_t k, uint64_t *z, uint64_t p, const uint64_t *x, const uint64_t *m); |
414 | 549 | ||
415 | // Subtract modulo m, z := (x - y) mod m, assuming x and y reduced | 550 | // Subtract modulo m, z := (x - y) mod m, assuming x and y reduced |
416 | // Inputs x[k], y[k], m[k]; output z[k] | 551 | // Inputs x[k], y[k], m[k]; output z[k] |
417 | extern void bignum_modsub (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y, uint64_t *m); | 552 | extern void bignum_modsub (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *y, const uint64_t *m); |
418 | 553 | ||
419 | // Compute "montification" constant z := 2^{128k} mod m | 554 | // Compute "montification" constant z := 2^{128k} mod m |
420 | // Input m[k]; output z[k]; temporary buffer t[>=k] | 555 | // Input m[k]; output z[k]; temporary buffer t[>=k] |
421 | extern void bignum_montifier (uint64_t k, uint64_t *z, uint64_t *m, uint64_t *t); | 556 | extern void bignum_montifier (uint64_t k, uint64_t *z, const uint64_t *m, uint64_t *t); |
557 | |||
558 | // Montgomery inverse modulo p_256 = 2^256 - 2^224 + 2^192 + 2^96 - 1 | ||
559 | // Input x[4]; output z[4] | ||
560 | extern void bignum_montinv_p256(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
561 | |||
562 | // Montgomery inverse modulo p_384 = 2^384 - 2^128 - 2^96 + 2^32 - 1 | ||
563 | // Input x[6]; output z[6] | ||
564 | extern void bignum_montinv_p384(uint64_t z[S2N_BIGNUM_STATIC 6],const uint64_t x[S2N_BIGNUM_STATIC 6]); | ||
565 | |||
566 | // Montgomery inverse modulo p_sm2 = 2^256 - 2^224 - 2^96 + 2^64 - 1 | ||
567 | // Input x[4]; output z[4] | ||
568 | extern void bignum_montinv_sm2(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
422 | 569 | ||
423 | // Montgomery multiply, z := (x * y / 2^{64k}) mod m | 570 | // Montgomery multiply, z := (x * y / 2^{64k}) mod m |
424 | // Inputs x[k], y[k], m[k]; output z[k] | 571 | // Inputs x[k], y[k], m[k]; output z[k] |
425 | extern void bignum_montmul (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y, uint64_t *m); | 572 | extern void bignum_montmul (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *y, const uint64_t *m); |
426 | 573 | ||
427 | // Montgomery multiply, z := (x * y / 2^256) mod p_256 | 574 | // Montgomery multiply, z := (x * y / 2^256) mod p_256 |
428 | // Inputs x[4], y[4]; output z[4] | 575 | // Inputs x[4], y[4]; output z[4] |
429 | extern void bignum_montmul_p256 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 576 | extern void bignum_montmul_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
430 | extern void bignum_montmul_p256_alt (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 577 | extern void bignum_montmul_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
431 | 578 | ||
432 | // Montgomery multiply, z := (x * y / 2^256) mod p_256k1 | 579 | // Montgomery multiply, z := (x * y / 2^256) mod p_256k1 |
433 | // Inputs x[4], y[4]; output z[4] | 580 | // Inputs x[4], y[4]; output z[4] |
434 | extern void bignum_montmul_p256k1 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 581 | extern void bignum_montmul_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
435 | extern void bignum_montmul_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 582 | extern void bignum_montmul_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
436 | 583 | ||
437 | // Montgomery multiply, z := (x * y / 2^384) mod p_384 | 584 | // Montgomery multiply, z := (x * y / 2^384) mod p_384 |
438 | // Inputs x[6], y[6]; output z[6] | 585 | // Inputs x[6], y[6]; output z[6] |
439 | extern void bignum_montmul_p384 (uint64_t z[static 6], uint64_t x[static 6], uint64_t y[static 6]); | 586 | extern void bignum_montmul_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]); |
440 | extern void bignum_montmul_p384_alt (uint64_t z[static 6], uint64_t x[static 6], uint64_t y[static 6]); | 587 | extern void bignum_montmul_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]); |
441 | 588 | ||
442 | // Montgomery multiply, z := (x * y / 2^576) mod p_521 | 589 | // Montgomery multiply, z := (x * y / 2^576) mod p_521 |
443 | // Inputs x[9], y[9]; output z[9] | 590 | // Inputs x[9], y[9]; output z[9] |
444 | extern void bignum_montmul_p521 (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); | 591 | extern void bignum_montmul_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9], const uint64_t y[S2N_BIGNUM_STATIC 9]); |
445 | extern void bignum_montmul_p521_alt (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); | 592 | extern void bignum_montmul_p521_alt (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9], const uint64_t y[S2N_BIGNUM_STATIC 9]); |
593 | |||
594 | // Montgomery multiply, z := (x * y / 2^256) mod p_sm2 | ||
595 | // Inputs x[4], y[4]; output z[4] | ||
596 | extern void bignum_montmul_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); | ||
597 | extern void bignum_montmul_sm2_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); | ||
446 | 598 | ||
447 | // Montgomery reduce, z := (x' / 2^{64p}) MOD m | 599 | // Montgomery reduce, z := (x' / 2^{64p}) MOD m |
448 | // Inputs x[n], m[k], p; output z[k] | 600 | // Inputs x[n], m[k], p; output z[k] |
449 | extern void bignum_montredc (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t *m, uint64_t p); | 601 | extern void bignum_montredc (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x, const uint64_t *m, uint64_t p); |
450 | 602 | ||
451 | // Montgomery square, z := (x^2 / 2^{64k}) mod m | 603 | // Montgomery square, z := (x^2 / 2^{64k}) mod m |
452 | // Inputs x[k], m[k]; output z[k] | 604 | // Inputs x[k], m[k]; output z[k] |
453 | extern void bignum_montsqr (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *m); | 605 | extern void bignum_montsqr (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *m); |
454 | 606 | ||
455 | // Montgomery square, z := (x^2 / 2^256) mod p_256 | 607 | // Montgomery square, z := (x^2 / 2^256) mod p_256 |
456 | // Input x[4]; output z[4] | 608 | // Input x[4]; output z[4] |
457 | extern void bignum_montsqr_p256 (uint64_t z[static 4], uint64_t x[static 4]); | 609 | extern void bignum_montsqr_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
458 | extern void bignum_montsqr_p256_alt (uint64_t z[static 4], uint64_t x[static 4]); | 610 | extern void bignum_montsqr_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
459 | 611 | ||
460 | // Montgomery square, z := (x^2 / 2^256) mod p_256k1 | 612 | // Montgomery square, z := (x^2 / 2^256) mod p_256k1 |
461 | // Input x[4]; output z[4] | 613 | // Input x[4]; output z[4] |
462 | extern void bignum_montsqr_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); | 614 | extern void bignum_montsqr_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
463 | extern void bignum_montsqr_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4]); | 615 | extern void bignum_montsqr_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
464 | 616 | ||
465 | // Montgomery square, z := (x^2 / 2^384) mod p_384 | 617 | // Montgomery square, z := (x^2 / 2^384) mod p_384 |
466 | // Input x[6]; output z[6] | 618 | // Input x[6]; output z[6] |
467 | extern void bignum_montsqr_p384 (uint64_t z[static 6], uint64_t x[static 6]); | 619 | extern void bignum_montsqr_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
468 | extern void bignum_montsqr_p384_alt (uint64_t z[static 6], uint64_t x[static 6]); | 620 | extern void bignum_montsqr_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
469 | 621 | ||
470 | // Montgomery square, z := (x^2 / 2^576) mod p_521 | 622 | // Montgomery square, z := (x^2 / 2^576) mod p_521 |
471 | // Input x[9]; output z[9] | 623 | // Input x[9]; output z[9] |
472 | extern void bignum_montsqr_p521 (uint64_t z[static 9], uint64_t x[static 9]); | 624 | extern void bignum_montsqr_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
473 | extern void bignum_montsqr_p521_alt (uint64_t z[static 9], uint64_t x[static 9]); | 625 | extern void bignum_montsqr_p521_alt (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
626 | |||
627 | // Montgomery square, z := (x^2 / 2^256) mod p_sm2 | ||
628 | // Input x[4]; output z[4] | ||
629 | extern void bignum_montsqr_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
630 | extern void bignum_montsqr_sm2_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
474 | 631 | ||
475 | // Multiply z := x * y | 632 | // Multiply z := x * y |
476 | // Inputs x[m], y[n]; output z[k] | 633 | // Inputs x[m], y[n]; output z[k] |
477 | extern void bignum_mul (uint64_t k, uint64_t *z, uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | 634 | extern void bignum_mul (uint64_t k, uint64_t *z, uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y); |
478 | 635 | ||
479 | // Multiply z := x * y | 636 | // Multiply z := x * y |
480 | // Inputs x[4], y[4]; output z[8] | 637 | // Inputs x[4], y[4]; output z[8] |
481 | extern void bignum_mul_4_8 (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]); | 638 | extern void bignum_mul_4_8 (uint64_t z[S2N_BIGNUM_STATIC 8], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
482 | extern void bignum_mul_4_8_alt (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]); | 639 | extern void bignum_mul_4_8_alt (uint64_t z[S2N_BIGNUM_STATIC 8], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
483 | 640 | ||
484 | // Multiply z := x * y | 641 | // Multiply z := x * y |
485 | // Inputs x[6], y[6]; output z[12] | 642 | // Inputs x[6], y[6]; output z[12] |
486 | extern void bignum_mul_6_12 (uint64_t z[static 12], uint64_t x[static 6], uint64_t y[static 6]); | 643 | extern void bignum_mul_6_12 (uint64_t z[S2N_BIGNUM_STATIC 12], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]); |
487 | extern void bignum_mul_6_12_alt (uint64_t z[static 12], uint64_t x[static 6], uint64_t y[static 6]); | 644 | extern void bignum_mul_6_12_alt (uint64_t z[S2N_BIGNUM_STATIC 12], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]); |
488 | 645 | ||
489 | // Multiply z := x * y | 646 | // Multiply z := x * y |
490 | // Inputs x[8], y[8]; output z[16] | 647 | // Inputs x[8], y[8]; output z[16] |
491 | extern void bignum_mul_8_16 (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]); | 648 | extern void bignum_mul_8_16 (uint64_t z[S2N_BIGNUM_STATIC 16], const uint64_t x[S2N_BIGNUM_STATIC 8], const uint64_t y[S2N_BIGNUM_STATIC 8]); |
492 | extern void bignum_mul_8_16_alt (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]); | 649 | extern void bignum_mul_8_16_alt (uint64_t z[S2N_BIGNUM_STATIC 16], const uint64_t x[S2N_BIGNUM_STATIC 8], const uint64_t y[S2N_BIGNUM_STATIC 8]); |
493 | 650 | ||
494 | // Multiply modulo p_25519, z := (x * y) mod p_25519 | 651 | // Multiply modulo p_25519, z := (x * y) mod p_25519 |
495 | // Inputs x[4], y[4]; output z[4] | 652 | // Inputs x[4], y[4]; output z[4] |
496 | extern void bignum_mul_p25519 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 653 | extern void bignum_mul_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
497 | extern void bignum_mul_p25519_alt (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 654 | extern void bignum_mul_p25519_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
498 | 655 | ||
499 | // Multiply modulo p_256k1, z := (x * y) mod p_256k1 | 656 | // Multiply modulo p_256k1, z := (x * y) mod p_256k1 |
500 | // Inputs x[4], y[4]; output z[4] | 657 | // Inputs x[4], y[4]; output z[4] |
501 | extern void bignum_mul_p256k1 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 658 | extern void bignum_mul_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
502 | extern void bignum_mul_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 659 | extern void bignum_mul_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
503 | 660 | ||
504 | // Multiply modulo p_521, z := (x * y) mod p_521, assuming x and y reduced | 661 | // Multiply modulo p_521, z := (x * y) mod p_521, assuming x and y reduced |
505 | // Inputs x[9], y[9]; output z[9] | 662 | // Inputs x[9], y[9]; output z[9] |
506 | extern void bignum_mul_p521 (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); | 663 | extern void bignum_mul_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9], const uint64_t y[S2N_BIGNUM_STATIC 9]); |
507 | extern void bignum_mul_p521_alt (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); | 664 | extern void bignum_mul_p521_alt (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9], const uint64_t y[S2N_BIGNUM_STATIC 9]); |
508 | 665 | ||
509 | // Multiply bignum by 10 and add word: z := 10 * z + d | 666 | // Multiply bignum by 10 and add word: z := 10 * z + d |
510 | // Inputs z[k], d; outputs function return (carry) and z[k] | 667 | // Inputs z[k], d; outputs function return (carry) and z[k] |
@@ -512,55 +669,59 @@ extern uint64_t bignum_muladd10 (uint64_t k, uint64_t *z, uint64_t d); | |||
512 | 669 | ||
513 | // Multiplex/select z := x (if p nonzero) or z := y (if p zero) | 670 | // Multiplex/select z := x (if p nonzero) or z := y (if p zero) |
514 | // Inputs p, x[k], y[k]; output z[k] | 671 | // Inputs p, x[k], y[k]; output z[k] |
515 | extern void bignum_mux (uint64_t p, uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y); | 672 | extern void bignum_mux (uint64_t p, uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *y); |
516 | 673 | ||
517 | // 256-bit multiplex/select z := x (if p nonzero) or z := y (if p zero) | 674 | // 256-bit multiplex/select z := x (if p nonzero) or z := y (if p zero) |
518 | // Inputs p, x[4], y[4]; output z[4] | 675 | // Inputs p, x[4], y[4]; output z[4] |
519 | extern void bignum_mux_4 (uint64_t p, uint64_t z[static 4],uint64_t x[static 4], uint64_t y[static 4]); | 676 | extern void bignum_mux_4 (uint64_t p, uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
520 | 677 | ||
521 | // 384-bit multiplex/select z := x (if p nonzero) or z := y (if p zero) | 678 | // 384-bit multiplex/select z := x (if p nonzero) or z := y (if p zero) |
522 | // Inputs p, x[6], y[6]; output z[6] | 679 | // Inputs p, x[6], y[6]; output z[6] |
523 | extern void bignum_mux_6 (uint64_t p, uint64_t z[static 6],uint64_t x[static 6], uint64_t y[static 6]); | 680 | extern void bignum_mux_6 (uint64_t p, uint64_t z[S2N_BIGNUM_STATIC 6],const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]); |
524 | 681 | ||
525 | // Select element from 16-element table, z := xs[k*i] | 682 | // Select element from 16-element table, z := xs[k*i] |
526 | // Inputs xs[16*k], i; output z[k] | 683 | // Inputs xs[16*k], i; output z[k] |
527 | extern void bignum_mux16 (uint64_t k, uint64_t *z, uint64_t *xs, uint64_t i); | 684 | extern void bignum_mux16 (uint64_t k, uint64_t *z, const uint64_t *xs, uint64_t i); |
528 | 685 | ||
529 | // Negate modulo p_25519, z := (-x) mod p_25519, assuming x reduced | 686 | // Negate modulo p_25519, z := (-x) mod p_25519, assuming x reduced |
530 | // Input x[4]; output z[4] | 687 | // Input x[4]; output z[4] |
531 | extern void bignum_neg_p25519 (uint64_t z[static 4], uint64_t x[static 4]); | 688 | extern void bignum_neg_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
532 | 689 | ||
533 | // Negate modulo p_256, z := (-x) mod p_256, assuming x reduced | 690 | // Negate modulo p_256, z := (-x) mod p_256, assuming x reduced |
534 | // Input x[4]; output z[4] | 691 | // Input x[4]; output z[4] |
535 | extern void bignum_neg_p256 (uint64_t z[static 4], uint64_t x[static 4]); | 692 | extern void bignum_neg_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
536 | 693 | ||
537 | // Negate modulo p_256k1, z := (-x) mod p_256k1, assuming x reduced | 694 | // Negate modulo p_256k1, z := (-x) mod p_256k1, assuming x reduced |
538 | // Input x[4]; output z[4] | 695 | // Input x[4]; output z[4] |
539 | extern void bignum_neg_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); | 696 | extern void bignum_neg_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
540 | 697 | ||
541 | // Negate modulo p_384, z := (-x) mod p_384, assuming x reduced | 698 | // Negate modulo p_384, z := (-x) mod p_384, assuming x reduced |
542 | // Input x[6]; output z[6] | 699 | // Input x[6]; output z[6] |
543 | extern void bignum_neg_p384 (uint64_t z[static 6], uint64_t x[static 6]); | 700 | extern void bignum_neg_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
544 | 701 | ||
545 | // Negate modulo p_521, z := (-x) mod p_521, assuming x reduced | 702 | // Negate modulo p_521, z := (-x) mod p_521, assuming x reduced |
546 | // Input x[9]; output z[9] | 703 | // Input x[9]; output z[9] |
547 | extern void bignum_neg_p521 (uint64_t z[static 9], uint64_t x[static 9]); | 704 | extern void bignum_neg_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
705 | |||
706 | // Negate modulo p_sm2, z := (-x) mod p_sm2, assuming x reduced | ||
707 | // Input x[4]; output z[4] | ||
708 | extern void bignum_neg_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
548 | 709 | ||
549 | // Negated modular inverse, z := (-1/x) mod 2^{64k} | 710 | // Negated modular inverse, z := (-1/x) mod 2^{64k} |
550 | // Input x[k]; output z[k] | 711 | // Input x[k]; output z[k] |
551 | extern void bignum_negmodinv (uint64_t k, uint64_t *z, uint64_t *x); | 712 | extern void bignum_negmodinv (uint64_t k, uint64_t *z, const uint64_t *x); |
552 | 713 | ||
553 | // Test bignum for nonzero-ness x =/= 0 | 714 | // Test bignum for nonzero-ness x =/= 0 |
554 | // Input x[k]; output function return | 715 | // Input x[k]; output function return |
555 | extern uint64_t bignum_nonzero (uint64_t k, uint64_t *x); | 716 | extern uint64_t bignum_nonzero (uint64_t k, const uint64_t *x); |
556 | 717 | ||
557 | // Test 256-bit bignum for nonzero-ness x =/= 0 | 718 | // Test 256-bit bignum for nonzero-ness x =/= 0 |
558 | // Input x[4]; output function return | 719 | // Input x[4]; output function return |
559 | extern uint64_t bignum_nonzero_4(uint64_t x[static 4]); | 720 | extern uint64_t bignum_nonzero_4(const uint64_t x[S2N_BIGNUM_STATIC 4]); |
560 | 721 | ||
561 | // Test 384-bit bignum for nonzero-ness x =/= 0 | 722 | // Test 384-bit bignum for nonzero-ness x =/= 0 |
562 | // Input x[6]; output function return | 723 | // Input x[6]; output function return |
563 | extern uint64_t bignum_nonzero_6(uint64_t x[static 6]); | 724 | extern uint64_t bignum_nonzero_6(const uint64_t x[S2N_BIGNUM_STATIC 6]); |
564 | 725 | ||
565 | // Normalize bignum in-place by shifting left till top bit is 1 | 726 | // Normalize bignum in-place by shifting left till top bit is 1 |
566 | // Input z[k]; outputs function return (bits shifted left) and z[k] | 727 | // Input z[k]; outputs function return (bits shifted left) and z[k] |
@@ -568,7 +729,7 @@ extern uint64_t bignum_normalize (uint64_t k, uint64_t *z); | |||
568 | 729 | ||
569 | // Test bignum for odd-ness | 730 | // Test bignum for odd-ness |
570 | // Input x[k]; output function return | 731 | // Input x[k]; output function return |
571 | extern uint64_t bignum_odd (uint64_t k, uint64_t *x); | 732 | extern uint64_t bignum_odd (uint64_t k, const uint64_t *x); |
572 | 733 | ||
573 | // Convert single digit to bignum, z := n | 734 | // Convert single digit to bignum, z := n |
574 | // Input n; output z[k] | 735 | // Input n; output z[k] |
@@ -576,39 +737,43 @@ extern void bignum_of_word (uint64_t k, uint64_t *z, uint64_t n); | |||
576 | 737 | ||
577 | // Optionally add, z := x + y (if p nonzero) or z := x (if p zero) | 738 | // Optionally add, z := x + y (if p nonzero) or z := x (if p zero) |
578 | // Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k] | 739 | // Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k] |
579 | extern uint64_t bignum_optadd (uint64_t k, uint64_t *z, uint64_t *x, uint64_t p, uint64_t *y); | 740 | extern uint64_t bignum_optadd (uint64_t k, uint64_t *z, const uint64_t *x, uint64_t p, const uint64_t *y); |
580 | 741 | ||
581 | // Optionally negate, z := -x (if p nonzero) or z := x (if p zero) | 742 | // Optionally negate, z := -x (if p nonzero) or z := x (if p zero) |
582 | // Inputs p, x[k]; outputs function return (nonzero input) and z[k] | 743 | // Inputs p, x[k]; outputs function return (nonzero input) and z[k] |
583 | extern uint64_t bignum_optneg (uint64_t k, uint64_t *z, uint64_t p, uint64_t *x); | 744 | extern uint64_t bignum_optneg (uint64_t k, uint64_t *z, uint64_t p, const uint64_t *x); |
584 | 745 | ||
585 | // Optionally negate modulo p_25519, z := (-x) mod p_25519 (if p nonzero) or z := x (if p zero), assuming x reduced | 746 | // Optionally negate modulo p_25519, z := (-x) mod p_25519 (if p nonzero) or z := x (if p zero), assuming x reduced |
586 | // Inputs p, x[4]; output z[4] | 747 | // Inputs p, x[4]; output z[4] |
587 | extern void bignum_optneg_p25519 (uint64_t z[static 4], uint64_t p, uint64_t x[static 4]); | 748 | extern void bignum_optneg_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t p, const uint64_t x[S2N_BIGNUM_STATIC 4]); |
588 | 749 | ||
589 | // Optionally negate modulo p_256, z := (-x) mod p_256 (if p nonzero) or z := x (if p zero), assuming x reduced | 750 | // Optionally negate modulo p_256, z := (-x) mod p_256 (if p nonzero) or z := x (if p zero), assuming x reduced |
590 | // Inputs p, x[4]; output z[4] | 751 | // Inputs p, x[4]; output z[4] |
591 | extern void bignum_optneg_p256 (uint64_t z[static 4], uint64_t p, uint64_t x[static 4]); | 752 | extern void bignum_optneg_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t p, const uint64_t x[S2N_BIGNUM_STATIC 4]); |
592 | 753 | ||
593 | // Optionally negate modulo p_256k1, z := (-x) mod p_256k1 (if p nonzero) or z := x (if p zero), assuming x reduced | 754 | // Optionally negate modulo p_256k1, z := (-x) mod p_256k1 (if p nonzero) or z := x (if p zero), assuming x reduced |
594 | // Inputs p, x[4]; output z[4] | 755 | // Inputs p, x[4]; output z[4] |
595 | extern void bignum_optneg_p256k1 (uint64_t z[static 4], uint64_t p, uint64_t x[static 4]); | 756 | extern void bignum_optneg_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t p, const uint64_t x[S2N_BIGNUM_STATIC 4]); |
596 | 757 | ||
597 | // Optionally negate modulo p_384, z := (-x) mod p_384 (if p nonzero) or z := x (if p zero), assuming x reduced | 758 | // Optionally negate modulo p_384, z := (-x) mod p_384 (if p nonzero) or z := x (if p zero), assuming x reduced |
598 | // Inputs p, x[6]; output z[6] | 759 | // Inputs p, x[6]; output z[6] |
599 | extern void bignum_optneg_p384 (uint64_t z[static 6], uint64_t p, uint64_t x[static 6]); | 760 | extern void bignum_optneg_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t p, const uint64_t x[S2N_BIGNUM_STATIC 6]); |
600 | 761 | ||
601 | // Optionally negate modulo p_521, z := (-x) mod p_521 (if p nonzero) or z := x (if p zero), assuming x reduced | 762 | // Optionally negate modulo p_521, z := (-x) mod p_521 (if p nonzero) or z := x (if p zero), assuming x reduced |
602 | // Inputs p, x[9]; output z[9] | 763 | // Inputs p, x[9]; output z[9] |
603 | extern void bignum_optneg_p521 (uint64_t z[static 9], uint64_t p, uint64_t x[static 9]); | 764 | extern void bignum_optneg_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], uint64_t p, const uint64_t x[S2N_BIGNUM_STATIC 9]); |
765 | |||
766 | // Optionally negate modulo p_sm2, z := (-x) mod p_sm2 (if p nonzero) or z := x (if p zero), assuming x reduced | ||
767 | // Inputs p, x[4]; output z[4] | ||
768 | extern void bignum_optneg_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t p, const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
604 | 769 | ||
605 | // Optionally subtract, z := x - y (if p nonzero) or z := x (if p zero) | 770 | // Optionally subtract, z := x - y (if p nonzero) or z := x (if p zero) |
606 | // Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k] | 771 | // Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k] |
607 | extern uint64_t bignum_optsub (uint64_t k, uint64_t *z, uint64_t *x, uint64_t p, uint64_t *y); | 772 | extern uint64_t bignum_optsub (uint64_t k, uint64_t *z, const uint64_t *x, uint64_t p, const uint64_t *y); |
608 | 773 | ||
609 | // Optionally subtract or add, z := x + sgn(p) * y interpreting p as signed | 774 | // Optionally subtract or add, z := x + sgn(p) * y interpreting p as signed |
610 | // Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k] | 775 | // Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k] |
611 | extern uint64_t bignum_optsubadd (uint64_t k, uint64_t *z, uint64_t *x, uint64_t p, uint64_t *y); | 776 | extern uint64_t bignum_optsubadd (uint64_t k, uint64_t *z, const uint64_t *x, uint64_t p, const uint64_t *y); |
612 | 777 | ||
613 | // Return bignum of power of 2, z := 2^n | 778 | // Return bignum of power of 2, z := 2^n |
614 | // Input n; output z[k] | 779 | // Input n; output z[k] |
@@ -616,216 +781,376 @@ extern void bignum_pow2 (uint64_t k, uint64_t *z, uint64_t n); | |||
616 | 781 | ||
617 | // Shift bignum left by c < 64 bits z := x * 2^c | 782 | // Shift bignum left by c < 64 bits z := x * 2^c |
618 | // Inputs x[n], c; outputs function return (carry-out) and z[k] | 783 | // Inputs x[n], c; outputs function return (carry-out) and z[k] |
619 | extern uint64_t bignum_shl_small (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t c); | 784 | extern uint64_t bignum_shl_small (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x, uint64_t c); |
620 | 785 | ||
621 | // Shift bignum right by c < 64 bits z := floor(x / 2^c) | 786 | // Shift bignum right by c < 64 bits z := floor(x / 2^c) |
622 | // Inputs x[n], c; outputs function return (bits shifted out) and z[k] | 787 | // Inputs x[n], c; outputs function return (bits shifted out) and z[k] |
623 | extern uint64_t bignum_shr_small (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t c); | 788 | extern uint64_t bignum_shr_small (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x, uint64_t c); |
624 | 789 | ||
625 | // Square, z := x^2 | 790 | // Square, z := x^2 |
626 | // Input x[n]; output z[k] | 791 | // Input x[n]; output z[k] |
627 | extern void bignum_sqr (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x); | 792 | extern void bignum_sqr (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x); |
628 | 793 | ||
629 | // Square, z := x^2 | 794 | // Square, z := x^2 |
630 | // Input x[4]; output z[8] | 795 | // Input x[4]; output z[8] |
631 | extern void bignum_sqr_4_8 (uint64_t z[static 8], uint64_t x[static 4]); | 796 | extern void bignum_sqr_4_8 (uint64_t z[S2N_BIGNUM_STATIC 8], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
632 | extern void bignum_sqr_4_8_alt (uint64_t z[static 8], uint64_t x[static 4]); | 797 | extern void bignum_sqr_4_8_alt (uint64_t z[S2N_BIGNUM_STATIC 8], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
633 | 798 | ||
634 | // Square, z := x^2 | 799 | // Square, z := x^2 |
635 | // Input x[6]; output z[12] | 800 | // Input x[6]; output z[12] |
636 | extern void bignum_sqr_6_12 (uint64_t z[static 12], uint64_t x[static 6]); | 801 | extern void bignum_sqr_6_12 (uint64_t z[S2N_BIGNUM_STATIC 12], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
637 | extern void bignum_sqr_6_12_alt (uint64_t z[static 12], uint64_t x[static 6]); | 802 | extern void bignum_sqr_6_12_alt (uint64_t z[S2N_BIGNUM_STATIC 12], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
638 | 803 | ||
639 | // Square, z := x^2 | 804 | // Square, z := x^2 |
640 | // Input x[8]; output z[16] | 805 | // Input x[8]; output z[16] |
641 | extern void bignum_sqr_8_16 (uint64_t z[static 16], uint64_t x[static 8]); | 806 | extern void bignum_sqr_8_16 (uint64_t z[S2N_BIGNUM_STATIC 16], const uint64_t x[S2N_BIGNUM_STATIC 8]); |
642 | extern void bignum_sqr_8_16_alt (uint64_t z[static 16], uint64_t x[static 8]); | 807 | extern void bignum_sqr_8_16_alt (uint64_t z[S2N_BIGNUM_STATIC 16], const uint64_t x[S2N_BIGNUM_STATIC 8]); |
643 | 808 | ||
644 | // Square modulo p_25519, z := (x^2) mod p_25519 | 809 | // Square modulo p_25519, z := (x^2) mod p_25519 |
645 | // Input x[4]; output z[4] | 810 | // Input x[4]; output z[4] |
646 | extern void bignum_sqr_p25519 (uint64_t z[static 4], uint64_t x[static 4]); | 811 | extern void bignum_sqr_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
647 | extern void bignum_sqr_p25519_alt (uint64_t z[static 4], uint64_t x[static 4]); | 812 | extern void bignum_sqr_p25519_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
648 | 813 | ||
649 | // Square modulo p_256k1, z := (x^2) mod p_256k1 | 814 | // Square modulo p_256k1, z := (x^2) mod p_256k1 |
650 | // Input x[4]; output z[4] | 815 | // Input x[4]; output z[4] |
651 | extern void bignum_sqr_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); | 816 | extern void bignum_sqr_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
652 | extern void bignum_sqr_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4]); | 817 | extern void bignum_sqr_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
653 | 818 | ||
654 | // Square modulo p_521, z := (x^2) mod p_521, assuming x reduced | 819 | // Square modulo p_521, z := (x^2) mod p_521, assuming x reduced |
655 | // Input x[9]; output z[9] | 820 | // Input x[9]; output z[9] |
656 | extern void bignum_sqr_p521 (uint64_t z[static 9], uint64_t x[static 9]); | 821 | extern void bignum_sqr_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
657 | extern void bignum_sqr_p521_alt (uint64_t z[static 9], uint64_t x[static 9]); | 822 | extern void bignum_sqr_p521_alt (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
823 | |||
824 | // Square root modulo p_25519 | ||
825 | // Input x[4]; output function return (Legendre symbol) and z[4] | ||
826 | extern int64_t bignum_sqrt_p25519(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
827 | extern int64_t bignum_sqrt_p25519_alt(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
658 | 828 | ||
659 | // Subtract, z := x - y | 829 | // Subtract, z := x - y |
660 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] | 830 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] |
661 | extern uint64_t bignum_sub (uint64_t p, uint64_t *z, uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | 831 | extern uint64_t bignum_sub (uint64_t p, uint64_t *z, uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y); |
662 | 832 | ||
663 | // Subtract modulo p_25519, z := (x - y) mod p_25519, assuming x and y reduced | 833 | // Subtract modulo p_25519, z := (x - y) mod p_25519, assuming x and y reduced |
664 | // Inputs x[4], y[4]; output z[4] | 834 | // Inputs x[4], y[4]; output z[4] |
665 | extern void bignum_sub_p25519 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 835 | extern void bignum_sub_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
666 | 836 | ||
667 | // Subtract modulo p_256, z := (x - y) mod p_256, assuming x and y reduced | 837 | // Subtract modulo p_256, z := (x - y) mod p_256, assuming x and y reduced |
668 | // Inputs x[4], y[4]; output z[4] | 838 | // Inputs x[4], y[4]; output z[4] |
669 | extern void bignum_sub_p256 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 839 | extern void bignum_sub_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
670 | 840 | ||
671 | // Subtract modulo p_256k1, z := (x - y) mod p_256k1, assuming x and y reduced | 841 | // Subtract modulo p_256k1, z := (x - y) mod p_256k1, assuming x and y reduced |
672 | // Inputs x[4], y[4]; output z[4] | 842 | // Inputs x[4], y[4]; output z[4] |
673 | extern void bignum_sub_p256k1 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 843 | extern void bignum_sub_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
674 | 844 | ||
675 | // Subtract modulo p_384, z := (x - y) mod p_384, assuming x and y reduced | 845 | // Subtract modulo p_384, z := (x - y) mod p_384, assuming x and y reduced |
676 | // Inputs x[6], y[6]; output z[6] | 846 | // Inputs x[6], y[6]; output z[6] |
677 | extern void bignum_sub_p384 (uint64_t z[static 6], uint64_t x[static 6], uint64_t y[static 6]); | 847 | extern void bignum_sub_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]); |
678 | 848 | ||
679 | // Subtract modulo p_521, z := (x - y) mod p_521, assuming x and y reduced | 849 | // Subtract modulo p_521, z := (x - y) mod p_521, assuming x and y reduced |
680 | // Inputs x[9], y[9]; output z[9] | 850 | // Inputs x[9], y[9]; output z[9] |
681 | extern void bignum_sub_p521 (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); | 851 | extern void bignum_sub_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9], const uint64_t y[S2N_BIGNUM_STATIC 9]); |
852 | |||
853 | // Subtract modulo p_sm2, z := (x - y) mod p_sm2, assuming x and y reduced | ||
854 | // Inputs x[4], y[4]; output z[4] | ||
855 | extern void bignum_sub_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); | ||
682 | 856 | ||
683 | // Convert 4-digit (256-bit) bignum to big-endian bytes | 857 | // Convert 4-digit (256-bit) bignum to big-endian bytes |
684 | // Input x[4]; output z[32] (bytes) | 858 | // Input x[4]; output z[32] (bytes) |
685 | extern void bignum_tobebytes_4 (uint8_t z[static 32], uint64_t x[static 4]); | 859 | extern void bignum_tobebytes_4 (uint8_t z[S2N_BIGNUM_STATIC 32], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
686 | 860 | ||
687 | // Convert 6-digit (384-bit) bignum to big-endian bytes | 861 | // Convert 6-digit (384-bit) bignum to big-endian bytes |
688 | // Input x[6]; output z[48] (bytes) | 862 | // Input x[6]; output z[48] (bytes) |
689 | extern void bignum_tobebytes_6 (uint8_t z[static 48], uint64_t x[static 6]); | 863 | extern void bignum_tobebytes_6 (uint8_t z[S2N_BIGNUM_STATIC 48], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
690 | 864 | ||
691 | // Convert 4-digit (256-bit) bignum to little-endian bytes | 865 | // Convert 4-digit (256-bit) bignum to little-endian bytes |
692 | // Input x[4]; output z[32] (bytes) | 866 | // Input x[4]; output z[32] (bytes) |
693 | extern void bignum_tolebytes_4 (uint8_t z[static 32], uint64_t x[static 4]); | 867 | extern void bignum_tolebytes_4 (uint8_t z[S2N_BIGNUM_STATIC 32], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
694 | 868 | ||
695 | // Convert 6-digit (384-bit) bignum to little-endian bytes | 869 | // Convert 6-digit (384-bit) bignum to little-endian bytes |
696 | // Input x[6]; output z[48] (bytes) | 870 | // Input x[6]; output z[48] (bytes) |
697 | extern void bignum_tolebytes_6 (uint8_t z[static 48], uint64_t x[static 6]); | 871 | extern void bignum_tolebytes_6 (uint8_t z[S2N_BIGNUM_STATIC 48], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
698 | 872 | ||
699 | // Convert 9-digit 528-bit bignum to little-endian bytes | 873 | // Convert 9-digit 528-bit bignum to little-endian bytes |
700 | // Input x[6]; output z[66] (bytes) | 874 | // Input x[6]; output z[66] (bytes) |
701 | extern void bignum_tolebytes_p521 (uint8_t z[static 66], uint64_t x[static 9]); | 875 | extern void bignum_tolebytes_p521 (uint8_t z[S2N_BIGNUM_STATIC 66], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
702 | 876 | ||
703 | // Convert to Montgomery form z := (2^256 * x) mod p_256 | 877 | // Convert to Montgomery form z := (2^256 * x) mod p_256 |
704 | // Input x[4]; output z[4] | 878 | // Input x[4]; output z[4] |
705 | extern void bignum_tomont_p256 (uint64_t z[static 4], uint64_t x[static 4]); | 879 | extern void bignum_tomont_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
706 | extern void bignum_tomont_p256_alt (uint64_t z[static 4], uint64_t x[static 4]); | 880 | extern void bignum_tomont_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
707 | 881 | ||
708 | // Convert to Montgomery form z := (2^256 * x) mod p_256k1 | 882 | // Convert to Montgomery form z := (2^256 * x) mod p_256k1 |
709 | // Input x[4]; output z[4] | 883 | // Input x[4]; output z[4] |
710 | extern void bignum_tomont_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); | 884 | extern void bignum_tomont_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
711 | extern void bignum_tomont_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4]); | 885 | extern void bignum_tomont_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
712 | 886 | ||
713 | // Convert to Montgomery form z := (2^384 * x) mod p_384 | 887 | // Convert to Montgomery form z := (2^384 * x) mod p_384 |
714 | // Input x[6]; output z[6] | 888 | // Input x[6]; output z[6] |
715 | extern void bignum_tomont_p384 (uint64_t z[static 6], uint64_t x[static 6]); | 889 | extern void bignum_tomont_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
716 | extern void bignum_tomont_p384_alt (uint64_t z[static 6], uint64_t x[static 6]); | 890 | extern void bignum_tomont_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
717 | 891 | ||
718 | // Convert to Montgomery form z := (2^576 * x) mod p_521 | 892 | // Convert to Montgomery form z := (2^576 * x) mod p_521 |
719 | // Input x[9]; output z[9] | 893 | // Input x[9]; output z[9] |
720 | extern void bignum_tomont_p521 (uint64_t z[static 9], uint64_t x[static 9]); | 894 | extern void bignum_tomont_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
895 | |||
896 | // Convert to Montgomery form z := (2^256 * x) mod p_sm2 | ||
897 | // Input x[4]; output z[4] | ||
898 | extern void bignum_tomont_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
721 | 899 | ||
722 | // Triple modulo p_256, z := (3 * x) mod p_256 | 900 | // Triple modulo p_256, z := (3 * x) mod p_256 |
723 | // Input x[4]; output z[4] | 901 | // Input x[4]; output z[4] |
724 | extern void bignum_triple_p256 (uint64_t z[static 4], uint64_t x[static 4]); | 902 | extern void bignum_triple_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
725 | extern void bignum_triple_p256_alt (uint64_t z[static 4], uint64_t x[static 4]); | 903 | extern void bignum_triple_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
726 | 904 | ||
727 | // Triple modulo p_256k1, z := (3 * x) mod p_256k1 | 905 | // Triple modulo p_256k1, z := (3 * x) mod p_256k1 |
728 | // Input x[4]; output z[4] | 906 | // Input x[4]; output z[4] |
729 | extern void bignum_triple_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); | 907 | extern void bignum_triple_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
730 | extern void bignum_triple_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4]); | 908 | extern void bignum_triple_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
731 | 909 | ||
732 | // Triple modulo p_384, z := (3 * x) mod p_384 | 910 | // Triple modulo p_384, z := (3 * x) mod p_384 |
733 | // Input x[6]; output z[6] | 911 | // Input x[6]; output z[6] |
734 | extern void bignum_triple_p384 (uint64_t z[static 6], uint64_t x[static 6]); | 912 | extern void bignum_triple_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
735 | extern void bignum_triple_p384_alt (uint64_t z[static 6], uint64_t x[static 6]); | 913 | extern void bignum_triple_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
736 | 914 | ||
737 | // Triple modulo p_521, z := (3 * x) mod p_521, assuming x reduced | 915 | // Triple modulo p_521, z := (3 * x) mod p_521, assuming x reduced |
738 | // Input x[9]; output z[9] | 916 | // Input x[9]; output z[9] |
739 | extern void bignum_triple_p521 (uint64_t z[static 9], uint64_t x[static 9]); | 917 | extern void bignum_triple_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
740 | extern void bignum_triple_p521_alt (uint64_t z[static 9], uint64_t x[static 9]); | 918 | extern void bignum_triple_p521_alt (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
919 | |||
920 | // Triple modulo p_sm2, z := (3 * x) mod p_sm2 | ||
921 | // Input x[4]; output z[4] | ||
922 | extern void bignum_triple_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
923 | extern void bignum_triple_sm2_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
741 | 924 | ||
742 | // Montgomery ladder step for curve25519 | 925 | // Montgomery ladder step for curve25519 |
743 | // Inputs point[8], pp[16], b; output rr[16] | 926 | // Inputs point[8], pp[16], b; output rr[16] |
744 | extern void curve25519_ladderstep(uint64_t rr[16],uint64_t point[8],uint64_t pp[16],uint64_t b); | 927 | extern void curve25519_ladderstep(uint64_t rr[16],const uint64_t point[8],const uint64_t pp[16],uint64_t b); |
745 | extern void curve25519_ladderstep_alt(uint64_t rr[16],uint64_t point[8],uint64_t pp[16],uint64_t b); | 928 | extern void curve25519_ladderstep_alt(uint64_t rr[16],const uint64_t point[8],const uint64_t pp[16],uint64_t b); |
746 | 929 | ||
747 | // Projective scalar multiplication, x coordinate only, for curve25519 | 930 | // Projective scalar multiplication, x coordinate only, for curve25519 |
748 | // Inputs scalar[4], point[4]; output res[8] | 931 | // Inputs scalar[4], point[4]; output res[8] |
749 | extern void curve25519_pxscalarmul(uint64_t res[static 8],uint64_t scalar[static 4],uint64_t point[static 4]); | 932 | extern void curve25519_pxscalarmul(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 4]); |
750 | extern void curve25519_pxscalarmul_alt(uint64_t res[static 8],uint64_t scalar[static 4],uint64_t point[static 4]); | 933 | extern void curve25519_pxscalarmul_alt(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 4]); |
751 | 934 | ||
752 | // x25519 function for curve25519 | 935 | // x25519 function for curve25519 |
753 | // Inputs scalar[4], point[4]; output res[4] | 936 | // Inputs scalar[4], point[4]; output res[4] |
754 | extern void curve25519_x25519(uint64_t res[static 4],uint64_t scalar[static 4],uint64_t point[static 4]); | 937 | extern void curve25519_x25519(uint64_t res[S2N_BIGNUM_STATIC 4],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 4]); |
755 | extern void curve25519_x25519_alt(uint64_t res[static 4],uint64_t scalar[static 4],uint64_t point[static 4]); | 938 | extern void curve25519_x25519_alt(uint64_t res[S2N_BIGNUM_STATIC 4],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 4]); |
939 | |||
940 | // x25519 function for curve25519 (byte array arguments) | ||
941 | // Inputs scalar[32] (bytes), point[32] (bytes); output res[32] (bytes) | ||
942 | extern void curve25519_x25519_byte(uint8_t res[S2N_BIGNUM_STATIC 32],const uint8_t scalar[S2N_BIGNUM_STATIC 32],const uint8_t point[S2N_BIGNUM_STATIC 32]); | ||
943 | extern void curve25519_x25519_byte_alt(uint8_t res[S2N_BIGNUM_STATIC 32],const uint8_t scalar[S2N_BIGNUM_STATIC 32],const uint8_t point[S2N_BIGNUM_STATIC 32]); | ||
756 | 944 | ||
757 | // x25519 function for curve25519 on base element 9 | 945 | // x25519 function for curve25519 on base element 9 |
758 | // Input scalar[4]; output res[4] | 946 | // Input scalar[4]; output res[4] |
759 | extern void curve25519_x25519base(uint64_t res[static 4],uint64_t scalar[static 4]); | 947 | extern void curve25519_x25519base(uint64_t res[S2N_BIGNUM_STATIC 4],const uint64_t scalar[S2N_BIGNUM_STATIC 4]); |
760 | extern void curve25519_x25519base_alt(uint64_t res[static 4],uint64_t scalar[static 4]); | 948 | extern void curve25519_x25519base_alt(uint64_t res[S2N_BIGNUM_STATIC 4],const uint64_t scalar[S2N_BIGNUM_STATIC 4]); |
949 | |||
950 | // x25519 function for curve25519 on base element 9 (byte array arguments) | ||
951 | // Input scalar[32] (bytes); output res[32] (bytes) | ||
952 | extern void curve25519_x25519base_byte(uint8_t res[S2N_BIGNUM_STATIC 32],const uint8_t scalar[S2N_BIGNUM_STATIC 32]); | ||
953 | extern void curve25519_x25519base_byte_alt(uint8_t res[S2N_BIGNUM_STATIC 32],const uint8_t scalar[S2N_BIGNUM_STATIC 32]); | ||
954 | |||
955 | // Decode compressed 256-bit form of edwards25519 point | ||
956 | // Input c[32] (bytes); output function return and z[8] | ||
957 | extern uint64_t edwards25519_decode(uint64_t z[S2N_BIGNUM_STATIC 8], const uint8_t c[S2N_BIGNUM_STATIC 32]); | ||
958 | extern uint64_t edwards25519_decode_alt(uint64_t z[S2N_BIGNUM_STATIC 8], const uint8_t c[S2N_BIGNUM_STATIC 32]); | ||
959 | |||
960 | // Encode edwards25519 point into compressed form as 256-bit number | ||
961 | // Input p[8]; output z[32] (bytes) | ||
962 | extern void edwards25519_encode(uint8_t z[S2N_BIGNUM_STATIC 32], const uint64_t p[S2N_BIGNUM_STATIC 8]); | ||
761 | 963 | ||
762 | // Extended projective addition for edwards25519 | 964 | // Extended projective addition for edwards25519 |
763 | // Inputs p1[16], p2[16]; output p3[16] | 965 | // Inputs p1[16], p2[16]; output p3[16] |
764 | extern void edwards25519_epadd(uint64_t p3[static 16],uint64_t p1[static 16],uint64_t p2[static 16]); | 966 | extern void edwards25519_epadd(uint64_t p3[S2N_BIGNUM_STATIC 16],const uint64_t p1[S2N_BIGNUM_STATIC 16],const uint64_t p2[S2N_BIGNUM_STATIC 16]); |
765 | extern void edwards25519_epadd_alt(uint64_t p3[static 16],uint64_t p1[static 16],uint64_t p2[static 16]); | 967 | extern void edwards25519_epadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 16],const uint64_t p1[S2N_BIGNUM_STATIC 16],const uint64_t p2[S2N_BIGNUM_STATIC 16]); |
766 | 968 | ||
767 | // Extended projective doubling for edwards25519 | 969 | // Extended projective doubling for edwards25519 |
768 | // Inputs p1[12]; output p3[16] | 970 | // Inputs p1[12]; output p3[16] |
769 | extern void edwards25519_epdouble(uint64_t p3[static 16],uint64_t p1[static 12]); | 971 | extern void edwards25519_epdouble(uint64_t p3[S2N_BIGNUM_STATIC 16],const uint64_t p1[S2N_BIGNUM_STATIC 12]); |
770 | extern void edwards25519_epdouble_alt(uint64_t p3[static 16],uint64_t p1[static 12]); | 972 | extern void edwards25519_epdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 16],const uint64_t p1[S2N_BIGNUM_STATIC 12]); |
771 | 973 | ||
772 | // Projective doubling for edwards25519 | 974 | // Projective doubling for edwards25519 |
773 | // Inputs p1[12]; output p3[12] | 975 | // Inputs p1[12]; output p3[12] |
774 | extern void edwards25519_pdouble(uint64_t p3[static 12],uint64_t p1[static 12]); | 976 | extern void edwards25519_pdouble(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]); |
775 | extern void edwards25519_pdouble_alt(uint64_t p3[static 12],uint64_t p1[static 12]); | 977 | extern void edwards25519_pdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]); |
776 | 978 | ||
777 | // Extended projective + precomputed mixed addition for edwards25519 | 979 | // Extended projective + precomputed mixed addition for edwards25519 |
778 | // Inputs p1[16], p2[12]; output p3[16] | 980 | // Inputs p1[16], p2[12]; output p3[16] |
779 | extern void edwards25519_pepadd(uint64_t p3[static 16],uint64_t p1[static 16],uint64_t p2[static 12]); | 981 | extern void edwards25519_pepadd(uint64_t p3[S2N_BIGNUM_STATIC 16],const uint64_t p1[S2N_BIGNUM_STATIC 16],const uint64_t p2[S2N_BIGNUM_STATIC 12]); |
780 | extern void edwards25519_pepadd_alt(uint64_t p3[static 16],uint64_t p1[static 16],uint64_t p2[static 12]); | 982 | extern void edwards25519_pepadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 16],const uint64_t p1[S2N_BIGNUM_STATIC 16],const uint64_t p2[S2N_BIGNUM_STATIC 12]); |
983 | |||
984 | // Scalar multiplication by standard basepoint for edwards25519 (Ed25519) | ||
985 | // Input scalar[4]; output res[8] | ||
986 | extern void edwards25519_scalarmulbase(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4]); | ||
987 | extern void edwards25519_scalarmulbase_alt(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4]); | ||
988 | |||
989 | // Double scalar multiplication for edwards25519, fresh and base point | ||
990 | // Input scalar[4], point[8], bscalar[4]; output res[8] | ||
991 | extern void edwards25519_scalarmuldouble(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4], const uint64_t point[S2N_BIGNUM_STATIC 8],const uint64_t bscalar[S2N_BIGNUM_STATIC 4]); | ||
992 | extern void edwards25519_scalarmuldouble_alt(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4], const uint64_t point[S2N_BIGNUM_STATIC 8],const uint64_t bscalar[S2N_BIGNUM_STATIC 4]); | ||
993 | |||
994 | // Scalar product of 2-element polynomial vectors in NTT domain, with mulcache | ||
995 | // Inputs a[512], b[512], bt[256] (signed 16-bit words); output r[256] (signed 16-bit words) | ||
996 | extern void mlkem_basemul_k2(int16_t r[S2N_BIGNUM_STATIC 256],const int16_t a[S2N_BIGNUM_STATIC 512],const int16_t b[S2N_BIGNUM_STATIC 512],const int16_t bt[S2N_BIGNUM_STATIC 256]); | ||
997 | |||
998 | // Scalar product of 3-element polynomial vectors in NTT domain, with mulcache | ||
999 | // Inputs a[768], b[768], bt[384] (signed 16-bit words); output r[256] (signed 16-bit words) | ||
1000 | extern void mlkem_basemul_k3(int16_t r[S2N_BIGNUM_STATIC 256],const int16_t a[S2N_BIGNUM_STATIC 768],const int16_t b[S2N_BIGNUM_STATIC 768],const int16_t bt[S2N_BIGNUM_STATIC 384]); | ||
1001 | |||
1002 | // Scalar product of 4-element polynomial vectors in NTT domain, with mulcache | ||
1003 | // Inputs a[1024], b[1024], bt[512] (signed 16-bit words); output r[256] (signed 16-bit words) | ||
1004 | extern void mlkem_basemul_k4(int16_t r[S2N_BIGNUM_STATIC 256],const int16_t a[S2N_BIGNUM_STATIC 1024],const int16_t b[S2N_BIGNUM_STATIC 1024],const int16_t bt[S2N_BIGNUM_STATIC 512]); | ||
1005 | |||
1006 | // Inverse number-theoretic transform from ML-KEM | ||
1007 | // Input a[256] (signed 16-bit words), z_01234[80] (signed 16-bit words), z_56[384] (signed 16-bit words); output a[256] (signed 16-bit words) | ||
1008 | extern void mlkem_intt(int16_t a[S2N_BIGNUM_STATIC 256],const int16_t z_01234[S2N_BIGNUM_STATIC 80],const int16_t z_56[S2N_BIGNUM_STATIC 384]); | ||
1009 | |||
1010 | // Precompute the mulcache data for a polynomial in the NTT domain | ||
1011 | // Inputs a[256], z[128] and t[128] (signed 16-bit words); output x[128] (signed 16-bit words) | ||
1012 | extern void mlkem_mulcache_compute(int16_t x[S2N_BIGNUM_STATIC 128],const int16_t a[S2N_BIGNUM_STATIC 256],const int16_t z[S2N_BIGNUM_STATIC 128],const int16_t t[S2N_BIGNUM_STATIC 128]); | ||
1013 | |||
1014 | // Forward number-theoretic transform from ML-KEM | ||
1015 | // Input a[256] (signed 16-bit words), z_01234[80] (signed 16-bit words), z_56[384] (signed 16-bit words); output a[256] (signed 16-bit words) | ||
1016 | extern void mlkem_ntt(int16_t a[S2N_BIGNUM_STATIC 256],const int16_t z_01234[S2N_BIGNUM_STATIC 80],const int16_t z_56[S2N_BIGNUM_STATIC 384]); | ||
1017 | |||
1018 | // Canonical modular reduction of polynomial coefficients for ML-KEM | ||
1019 | // Input a[256] (signed 16-bit words); output a[256] (signed 16-bit words) | ||
1020 | extern void mlkem_reduce(int16_t a[S2N_BIGNUM_STATIC 256]); | ||
1021 | |||
1022 | // Pack ML-KEM polynomial coefficients as 12-bit numbers | ||
1023 | // Input a[256] (signed 16-bit words); output r[384] (bytes) | ||
1024 | extern void mlkem_tobytes(uint8_t r[S2N_BIGNUM_STATIC 384],const int16_t a[S2N_BIGNUM_STATIC 256]); | ||
1025 | |||
1026 | // Conversion of ML-KEM polynomial coefficients to Montgomery form | ||
1027 | // Input a[256] (signed 16-bit words); output a[256] (signed 16-bit words) | ||
1028 | extern void mlkem_tomont(int16_t a[S2N_BIGNUM_STATIC 256]); | ||
1029 | |||
1030 | // Uniform rejection sampling for ML-KEM | ||
1031 | // Inputs *buf (unsigned bytes), buflen, table (unsigned bytes); output r[256] (signed 16-bit words), return | ||
1032 | extern uint64_t mlkem_rej_uniform_VARIABLE_TIME(int16_t r[S2N_BIGNUM_STATIC 256],const uint8_t *buf,uint64_t buflen,const uint8_t *table); | ||
781 | 1033 | ||
782 | // Point addition on NIST curve P-256 in Montgomery-Jacobian coordinates | 1034 | // Point addition on NIST curve P-256 in Montgomery-Jacobian coordinates |
783 | // Inputs p1[12], p2[12]; output p3[12] | 1035 | // Inputs p1[12], p2[12]; output p3[12] |
784 | extern void p256_montjadd(uint64_t p3[static 12],uint64_t p1[static 12],uint64_t p2[static 12]); | 1036 | extern void p256_montjadd(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 12]); |
1037 | extern void p256_montjadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 12]); | ||
785 | 1038 | ||
786 | // Point doubling on NIST curve P-256 in Montgomery-Jacobian coordinates | 1039 | // Point doubling on NIST curve P-256 in Montgomery-Jacobian coordinates |
787 | // Inputs p1[12]; output p3[12] | 1040 | // Inputs p1[12]; output p3[12] |
788 | extern void p256_montjdouble(uint64_t p3[static 12],uint64_t p1[static 12]); | 1041 | extern void p256_montjdouble(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]); |
1042 | extern void p256_montjdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]); | ||
789 | 1043 | ||
790 | // Point mixed addition on NIST curve P-256 in Montgomery-Jacobian coordinates | 1044 | // Point mixed addition on NIST curve P-256 in Montgomery-Jacobian coordinates |
791 | // Inputs p1[12], p2[8]; output p3[12] | 1045 | // Inputs p1[12], p2[8]; output p3[12] |
792 | extern void p256_montjmixadd(uint64_t p3[static 12],uint64_t p1[static 12],uint64_t p2[static 8]); | 1046 | extern void p256_montjmixadd(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 8]); |
1047 | extern void p256_montjmixadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 8]); | ||
1048 | |||
1049 | // Montgomery-Jacobian form scalar multiplication for P-256 | ||
1050 | // Input scalar[4], point[12]; output res[12] | ||
1051 | extern void p256_montjscalarmul(uint64_t res[S2N_BIGNUM_STATIC 12],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 12]); | ||
1052 | extern void p256_montjscalarmul_alt(uint64_t res[S2N_BIGNUM_STATIC 12],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 12]); | ||
1053 | |||
1054 | // Scalar multiplication for NIST curve P-256 | ||
1055 | // Input scalar[4], point[8]; output res[8] | ||
1056 | extern void p256_scalarmul(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 8]); | ||
1057 | extern void p256_scalarmul_alt(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 8]); | ||
1058 | |||
1059 | // Scalar multiplication for precomputed point on NIST curve P-256 | ||
1060 | // Input scalar[4], blocksize, table[]; output res[8] | ||
1061 | extern void p256_scalarmulbase(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4],uint64_t blocksize,const uint64_t *table); | ||
1062 | extern void p256_scalarmulbase_alt(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4],uint64_t blocksize,const uint64_t *table); | ||
793 | 1063 | ||
794 | // Point addition on NIST curve P-384 in Montgomery-Jacobian coordinates | 1064 | // Point addition on NIST curve P-384 in Montgomery-Jacobian coordinates |
795 | // Inputs p1[18], p2[18]; output p3[18] | 1065 | // Inputs p1[18], p2[18]; output p3[18] |
796 | extern void p384_montjadd(uint64_t p3[static 18],uint64_t p1[static 18],uint64_t p2[static 18]); | 1066 | extern void p384_montjadd(uint64_t p3[S2N_BIGNUM_STATIC 18],const uint64_t p1[S2N_BIGNUM_STATIC 18],const uint64_t p2[S2N_BIGNUM_STATIC 18]); |
1067 | extern void p384_montjadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 18],const uint64_t p1[S2N_BIGNUM_STATIC 18],const uint64_t p2[S2N_BIGNUM_STATIC 18]); | ||
797 | 1068 | ||
798 | // Point doubling on NIST curve P-384 in Montgomery-Jacobian coordinates | 1069 | // Point doubling on NIST curve P-384 in Montgomery-Jacobian coordinates |
799 | // Inputs p1[18]; output p3[18] | 1070 | // Inputs p1[18]; output p3[18] |
800 | extern void p384_montjdouble(uint64_t p3[static 18],uint64_t p1[static 18]); | 1071 | extern void p384_montjdouble(uint64_t p3[S2N_BIGNUM_STATIC 18],const uint64_t p1[S2N_BIGNUM_STATIC 18]); |
1072 | extern void p384_montjdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 18],const uint64_t p1[S2N_BIGNUM_STATIC 18]); | ||
801 | 1073 | ||
802 | // Point mixed addition on NIST curve P-384 in Montgomery-Jacobian coordinates | 1074 | // Point mixed addition on NIST curve P-384 in Montgomery-Jacobian coordinates |
803 | // Inputs p1[18], p2[12]; output p3[18] | 1075 | // Inputs p1[18], p2[12]; output p3[18] |
804 | extern void p384_montjmixadd(uint64_t p3[static 18],uint64_t p1[static 18],uint64_t p2[static 12]); | 1076 | extern void p384_montjmixadd(uint64_t p3[S2N_BIGNUM_STATIC 18],const uint64_t p1[S2N_BIGNUM_STATIC 18],const uint64_t p2[S2N_BIGNUM_STATIC 12]); |
1077 | extern void p384_montjmixadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 18],const uint64_t p1[S2N_BIGNUM_STATIC 18],const uint64_t p2[S2N_BIGNUM_STATIC 12]); | ||
1078 | |||
1079 | // Montgomery-Jacobian form scalar multiplication for P-384 | ||
1080 | // Input scalar[6], point[18]; output res[18] | ||
1081 | extern void p384_montjscalarmul(uint64_t res[S2N_BIGNUM_STATIC 18],const uint64_t scalar[S2N_BIGNUM_STATIC 6],const uint64_t point[S2N_BIGNUM_STATIC 18]); | ||
1082 | extern void p384_montjscalarmul_alt(uint64_t res[S2N_BIGNUM_STATIC 18],const uint64_t scalar[S2N_BIGNUM_STATIC 6],const uint64_t point[S2N_BIGNUM_STATIC 18]); | ||
805 | 1083 | ||
806 | // Point addition on NIST curve P-521 in Jacobian coordinates | 1084 | // Point addition on NIST curve P-521 in Jacobian coordinates |
807 | // Inputs p1[27], p2[27]; output p3[27] | 1085 | // Inputs p1[27], p2[27]; output p3[27] |
808 | extern void p521_jadd(uint64_t p3[static 27],uint64_t p1[static 27],uint64_t p2[static 27]); | 1086 | extern void p521_jadd(uint64_t p3[S2N_BIGNUM_STATIC 27],const uint64_t p1[S2N_BIGNUM_STATIC 27],const uint64_t p2[S2N_BIGNUM_STATIC 27]); |
1087 | extern void p521_jadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 27],const uint64_t p1[S2N_BIGNUM_STATIC 27],const uint64_t p2[S2N_BIGNUM_STATIC 27]); | ||
809 | 1088 | ||
810 | // Point doubling on NIST curve P-521 in Jacobian coordinates | 1089 | // Point doubling on NIST curve P-521 in Jacobian coordinates |
811 | // Input p1[27]; output p3[27] | 1090 | // Input p1[27]; output p3[27] |
812 | extern void p521_jdouble(uint64_t p3[static 27],uint64_t p1[static 27]); | 1091 | extern void p521_jdouble(uint64_t p3[S2N_BIGNUM_STATIC 27],const uint64_t p1[S2N_BIGNUM_STATIC 27]); |
1092 | extern void p521_jdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 27],const uint64_t p1[S2N_BIGNUM_STATIC 27]); | ||
813 | 1093 | ||
814 | // Point mixed addition on NIST curve P-521 in Jacobian coordinates | 1094 | // Point mixed addition on NIST curve P-521 in Jacobian coordinates |
815 | // Inputs p1[27], p2[18]; output p3[27] | 1095 | // Inputs p1[27], p2[18]; output p3[27] |
816 | extern void p521_jmixadd(uint64_t p3[static 27],uint64_t p1[static 27],uint64_t p2[static 18]); | 1096 | extern void p521_jmixadd(uint64_t p3[S2N_BIGNUM_STATIC 27],const uint64_t p1[S2N_BIGNUM_STATIC 27],const uint64_t p2[S2N_BIGNUM_STATIC 18]); |
1097 | extern void p521_jmixadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 27],const uint64_t p1[S2N_BIGNUM_STATIC 27],const uint64_t p2[S2N_BIGNUM_STATIC 18]); | ||
1098 | |||
1099 | // Jacobian form scalar multiplication for P-521 | ||
1100 | // Input scalar[9], point[27]; output res[27] | ||
1101 | extern void p521_jscalarmul(uint64_t res[S2N_BIGNUM_STATIC 27],const uint64_t scalar[S2N_BIGNUM_STATIC 9],const uint64_t point[S2N_BIGNUM_STATIC 27]); | ||
1102 | extern void p521_jscalarmul_alt(uint64_t res[S2N_BIGNUM_STATIC 27],const uint64_t scalar[S2N_BIGNUM_STATIC 9],const uint64_t point[S2N_BIGNUM_STATIC 27]); | ||
817 | 1103 | ||
818 | // Point addition on SECG curve secp256k1 in Jacobian coordinates | 1104 | // Point addition on SECG curve secp256k1 in Jacobian coordinates |
819 | // Inputs p1[12], p2[12]; output p3[12] | 1105 | // Inputs p1[12], p2[12]; output p3[12] |
820 | extern void secp256k1_jadd(uint64_t p3[static 12],uint64_t p1[static 12],uint64_t p2[static 12]); | 1106 | extern void secp256k1_jadd(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 12]); |
1107 | extern void secp256k1_jadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 12]); | ||
821 | 1108 | ||
822 | // Point doubling on SECG curve secp256k1 in Jacobian coordinates | 1109 | // Point doubling on SECG curve secp256k1 in Jacobian coordinates |
823 | // Input p1[12]; output p3[12] | 1110 | // Input p1[12]; output p3[12] |
824 | extern void secp256k1_jdouble(uint64_t p3[static 12],uint64_t p1[static 12]); | 1111 | extern void secp256k1_jdouble(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]); |
1112 | extern void secp256k1_jdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]); | ||
825 | 1113 | ||
826 | // Point mixed addition on SECG curve secp256k1 in Jacobian coordinates | 1114 | // Point mixed addition on SECG curve secp256k1 in Jacobian coordinates |
827 | // Inputs p1[12], p2[8]; output p3[12] | 1115 | // Inputs p1[12], p2[8]; output p3[12] |
828 | extern void secp256k1_jmixadd(uint64_t p3[static 12],uint64_t p1[static 12],uint64_t p2[static 8]); | 1116 | extern void secp256k1_jmixadd(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 8]); |
1117 | extern void secp256k1_jmixadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 8]); | ||
1118 | |||
1119 | // Keccak-f1600 permutation for SHA3 | ||
1120 | // Inputs a[25], rc[24]; output a[25] | ||
1121 | extern void sha3_keccak_f1600(uint64_t a[S2N_BIGNUM_STATIC 25],const uint64_t rc[S2N_BIGNUM_STATIC 24]); | ||
1122 | extern void sha3_keccak_f1600_alt(uint64_t a[S2N_BIGNUM_STATIC 25],const uint64_t rc[S2N_BIGNUM_STATIC 24]); | ||
1123 | |||
1124 | // Batched 2-way Keccak-f1600 permutation for SHA3 | ||
1125 | // Inputs a[50], rc[24]; output a[50] | ||
1126 | extern void sha3_keccak2_f1600(uint64_t a[S2N_BIGNUM_STATIC 50],const uint64_t rc[S2N_BIGNUM_STATIC 24]); | ||
1127 | extern void sha3_keccak2_f1600_alt(uint64_t a[S2N_BIGNUM_STATIC 50],const uint64_t rc[S2N_BIGNUM_STATIC 24]); | ||
1128 | |||
1129 | // Batched 4-way Keccak-f1600 permutation for SHA3 | ||
1130 | // Inputs a[100], rc[24]; output a[100] | ||
1131 | extern void sha3_keccak4_f1600(uint64_t a[S2N_BIGNUM_STATIC 100],const uint64_t rc[S2N_BIGNUM_STATIC 24]); | ||
1132 | extern void sha3_keccak4_f1600_alt(uint64_t a[S2N_BIGNUM_STATIC 100],const uint64_t rc[S2N_BIGNUM_STATIC 24]); | ||
1133 | extern void sha3_keccak4_f1600_alt2(uint64_t a[S2N_BIGNUM_STATIC 100],const uint64_t rc[S2N_BIGNUM_STATIC 24]); | ||
1134 | |||
1135 | // Point addition on CC curve SM2 in Montgomery-Jacobian coordinates | ||
1136 | // Inputs p1[12], p2[12]; output p3[12] | ||
1137 | extern void sm2_montjadd(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 12]); | ||
1138 | extern void sm2_montjadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 12]); | ||
1139 | |||
1140 | // Point doubling on CC curve SM2 in Montgomery-Jacobian coordinates | ||
1141 | // Inputs p1[12]; output p3[12] | ||
1142 | extern void sm2_montjdouble(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]); | ||
1143 | extern void sm2_montjdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]); | ||
1144 | |||
1145 | // Point mixed addition on CC curve SM2 in Montgomery-Jacobian coordinates | ||
1146 | // Inputs p1[12], p2[8]; output p3[12] | ||
1147 | extern void sm2_montjmixadd(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 8]); | ||
1148 | extern void sm2_montjmixadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 8]); | ||
1149 | |||
1150 | // Montgomery-Jacobian form scalar multiplication for CC curve SM2 | ||
1151 | // Input scalar[4], point[12]; output res[12] | ||
1152 | extern void sm2_montjscalarmul(uint64_t res[S2N_BIGNUM_STATIC 12],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 12]); | ||
1153 | extern void sm2_montjscalarmul_alt(uint64_t res[S2N_BIGNUM_STATIC 12],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 12]); | ||
829 | 1154 | ||
830 | // Reverse the bytes in a single word | 1155 | // Reverse the bytes in a single word |
831 | // Input a; output function return | 1156 | // Input a; output function return |
@@ -839,6 +1164,10 @@ extern uint64_t word_clz (uint64_t a); | |||
839 | // Input a; output function return | 1164 | // Input a; output function return |
840 | extern uint64_t word_ctz (uint64_t a); | 1165 | extern uint64_t word_ctz (uint64_t a); |
841 | 1166 | ||
1167 | // Perform 59 "divstep" iterations and return signed matrix of updates | ||
1168 | // Inputs d, f, g; output m[2][2] and function return | ||
1169 | extern int64_t word_divstep59(int64_t m[2][2],int64_t d,uint64_t f,uint64_t g); | ||
1170 | |||
842 | // Return maximum of two unsigned 64-bit words | 1171 | // Return maximum of two unsigned 64-bit words |
843 | // Inputs a, b; output function return | 1172 | // Inputs a, b; output function return |
844 | extern uint64_t word_max (uint64_t a, uint64_t b); | 1173 | extern uint64_t word_max (uint64_t a, uint64_t b); |
@@ -851,6 +1180,10 @@ extern uint64_t word_min (uint64_t a, uint64_t b); | |||
851 | // Input a; output function return | 1180 | // Input a; output function return |
852 | extern uint64_t word_negmodinv (uint64_t a); | 1181 | extern uint64_t word_negmodinv (uint64_t a); |
853 | 1182 | ||
1183 | // Count number of set bits in a single 64-bit word (population count) | ||
1184 | // Input a; output function return | ||
1185 | extern uint64_t word_popcount (uint64_t a); | ||
1186 | |||
854 | // Single-word reciprocal, 2^64 + ret = ceil(2^128/a) - 1 if MSB of "a" is set | 1187 | // Single-word reciprocal, 2^64 + ret = ceil(2^128/a) - 1 if MSB of "a" is set |
855 | // Input a; output function return | 1188 | // Input a; output function return |
856 | extern uint64_t word_recip (uint64_t a); | 1189 | extern uint64_t word_recip (uint64_t a); |
diff --git a/src/lib/libcrypto/bn/s2n_bignum_internal.h b/src/lib/libcrypto/bn/s2n_bignum_internal.h index b82db7d019..37eebb4fd6 100644 --- a/src/lib/libcrypto/bn/s2n_bignum_internal.h +++ b/src/lib/libcrypto/bn/s2n_bignum_internal.h | |||
@@ -1,3 +1,5 @@ | |||
1 | // $OpenBSD: s2n_bignum_internal.h,v 1.5 2025/08/12 10:01:37 jsing Exp $ | ||
2 | // | ||
1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
2 | // | 4 | // |
3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
@@ -14,14 +16,14 @@ | |||
14 | 16 | ||
15 | #ifdef __APPLE__ | 17 | #ifdef __APPLE__ |
16 | # define S2N_BN_SYMBOL(NAME) _##NAME | 18 | # define S2N_BN_SYMBOL(NAME) _##NAME |
19 | # if defined(__AARCH64EL__) || defined(__ARMEL__) | ||
20 | # define __LF %% | ||
21 | # else | ||
22 | # define __LF ; | ||
23 | # endif | ||
17 | #else | 24 | #else |
18 | # define S2N_BN_SYMBOL(name) name | 25 | # define S2N_BN_SYMBOL(name) name |
19 | #endif | 26 | # define __LF ; |
20 | |||
21 | #ifdef __CET__ | ||
22 | # include <cet.h> | ||
23 | #else | ||
24 | # define _CET_ENDBR | ||
25 | #endif | 27 | #endif |
26 | 28 | ||
27 | #define S2N_BN_SYM_VISIBILITY_DIRECTIVE(name) .globl S2N_BN_SYMBOL(name) | 29 | #define S2N_BN_SYM_VISIBILITY_DIRECTIVE(name) .globl S2N_BN_SYMBOL(name) |
@@ -34,3 +36,24 @@ | |||
34 | #else | 36 | #else |
35 | # define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) /* NO-OP: S2N_BN_SYM_PRIVACY_DIRECTIVE */ | 37 | # define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) /* NO-OP: S2N_BN_SYM_PRIVACY_DIRECTIVE */ |
36 | #endif | 38 | #endif |
39 | |||
40 | // Enable indirect branch tracking support unless explicitly disabled | ||
41 | // with -DNO_IBT. If the platform supports CET, simply inherit this from | ||
42 | // the usual header. Otherwise manually define _CET_ENDBR, used at each | ||
43 | // x86 entry point, to be the ENDBR64 instruction, with an explicit byte | ||
44 | // sequence for compilers/assemblers that don't know about it. Note that | ||
45 | // it is safe to use ENDBR64 on all platforms, since the encoding is by | ||
46 | // design interpreted as a NOP on all pre-CET x86_64 processors. The only | ||
47 | // downside is a small increase in code size and potentially a modest | ||
48 | // slowdown from executing one more instruction. | ||
49 | |||
50 | #if NO_IBT | ||
51 | # if defined(_CET_ENDBR) | ||
52 | # error "The s2n-bignum build option NO_IBT was configured, but _CET_ENDBR is defined in this compilation unit. That is weird, so failing the build." | ||
53 | # endif | ||
54 | # define _CET_ENDBR | ||
55 | #elif defined(__CET__) | ||
56 | # include <cet.h> | ||
57 | #elif !defined(_CET_ENDBR) | ||
58 | # define _CET_ENDBR .byte 0xf3,0x0f,0x1e,0xfa | ||
59 | #endif | ||