diff options
Diffstat (limited to 'src/lib/libcrypto/bn')
56 files changed, 3827 insertions, 743 deletions
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S b/src/lib/libcrypto/bn/arch/amd64/bignum_add.S index 5fe4aae7a1..1d4e6d08ef 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_add.S | |||
| @@ -1,3 +1,5 @@ | |||
| 1 | // $OpenBSD: bignum_add.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
| 2 | // | ||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | // | 4 | // |
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
| @@ -16,9 +18,8 @@ | |||
| 16 | // Add, z := x + y | 18 | // Add, z := x + y |
| 17 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] | 19 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] |
| 18 | // | 20 | // |
| 19 | // extern uint64_t bignum_add | 21 | // extern uint64_t bignum_add(uint64_t p, uint64_t *z, uint64_t m, |
| 20 | // (uint64_t p, uint64_t *z, | 22 | // const uint64_t *x, uint64_t n, const uint64_t *y); |
| 21 | // uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | ||
| 22 | // | 23 | // |
| 23 | // Does the z := x + y operation, truncating modulo p words in general and | 24 | // Does the z := x + y operation, truncating modulo p words in general and |
| 24 | // returning a top carry (0 or 1) in the p'th place, only adding the input | 25 | // returning a top carry (0 or 1) in the p'th place, only adding the input |
| @@ -49,7 +50,7 @@ | |||
| 49 | 50 | ||
| 50 | 51 | ||
| 51 | S2N_BN_SYMBOL(bignum_add): | 52 | S2N_BN_SYMBOL(bignum_add): |
| 52 | _CET_ENDBR | 53 | _CET_ENDBR |
| 53 | 54 | ||
| 54 | #if WINDOWS_ABI | 55 | #if WINDOWS_ABI |
| 55 | push rdi | 56 | push rdi |
| @@ -75,7 +76,7 @@ S2N_BN_SYMBOL(bignum_add): | |||
| 75 | cmp p, n | 76 | cmp p, n |
| 76 | cmovc n, p | 77 | cmovc n, p |
| 77 | cmp m, n | 78 | cmp m, n |
| 78 | jc ylonger | 79 | jc bignum_add_ylonger |
| 79 | 80 | ||
| 80 | // The case where x is longer or of the same size (p >= m >= n) | 81 | // The case where x is longer or of the same size (p >= m >= n) |
| 81 | 82 | ||
| @@ -83,27 +84,27 @@ S2N_BN_SYMBOL(bignum_add): | |||
| 83 | sub m, n | 84 | sub m, n |
| 84 | inc m | 85 | inc m |
| 85 | test n, n | 86 | test n, n |
| 86 | jz xtest | 87 | jz bignum_add_xtest |
| 87 | xmainloop: | 88 | bignum_add_xmainloop: |
| 88 | mov a, [x+8*i] | 89 | mov a, [x+8*i] |
| 89 | adc a, [y+8*i] | 90 | adc a, [y+8*i] |
| 90 | mov [z+8*i],a | 91 | mov [z+8*i],a |
| 91 | inc i | 92 | inc i |
| 92 | dec n | 93 | dec n |
| 93 | jnz xmainloop | 94 | jnz bignum_add_xmainloop |
| 94 | jmp xtest | 95 | jmp bignum_add_xtest |
| 95 | xtoploop: | 96 | bignum_add_xtoploop: |
| 96 | mov a, [x+8*i] | 97 | mov a, [x+8*i] |
| 97 | adc a, 0 | 98 | adc a, 0 |
| 98 | mov [z+8*i],a | 99 | mov [z+8*i],a |
| 99 | inc i | 100 | inc i |
| 100 | xtest: | 101 | bignum_add_xtest: |
| 101 | dec m | 102 | dec m |
| 102 | jnz xtoploop | 103 | jnz bignum_add_xtoploop |
| 103 | mov ashort, 0 | 104 | mov ashort, 0 |
| 104 | adc a, 0 | 105 | adc a, 0 |
| 105 | test p, p | 106 | test p, p |
| 106 | jnz tails | 107 | jnz bignum_add_tails |
| 107 | #if WINDOWS_ABI | 108 | #if WINDOWS_ABI |
| 108 | pop rsi | 109 | pop rsi |
| 109 | pop rdi | 110 | pop rdi |
| @@ -112,30 +113,30 @@ xtest: | |||
| 112 | 113 | ||
| 113 | // The case where y is longer (p >= n > m) | 114 | // The case where y is longer (p >= n > m) |
| 114 | 115 | ||
| 115 | ylonger: | 116 | bignum_add_ylonger: |
| 116 | 117 | ||
| 117 | sub p, n | 118 | sub p, n |
| 118 | sub n, m | 119 | sub n, m |
| 119 | test m, m | 120 | test m, m |
| 120 | jz ytoploop | 121 | jz bignum_add_ytoploop |
| 121 | ymainloop: | 122 | bignum_add_ymainloop: |
| 122 | mov a, [x+8*i] | 123 | mov a, [x+8*i] |
| 123 | adc a, [y+8*i] | 124 | adc a, [y+8*i] |
| 124 | mov [z+8*i],a | 125 | mov [z+8*i],a |
| 125 | inc i | 126 | inc i |
| 126 | dec m | 127 | dec m |
| 127 | jnz ymainloop | 128 | jnz bignum_add_ymainloop |
| 128 | ytoploop: | 129 | bignum_add_ytoploop: |
| 129 | mov a, [y+8*i] | 130 | mov a, [y+8*i] |
| 130 | adc a, 0 | 131 | adc a, 0 |
| 131 | mov [z+8*i],a | 132 | mov [z+8*i],a |
| 132 | inc i | 133 | inc i |
| 133 | dec n | 134 | dec n |
| 134 | jnz ytoploop | 135 | jnz bignum_add_ytoploop |
| 135 | mov ashort, 0 | 136 | mov ashort, 0 |
| 136 | adc a, 0 | 137 | adc a, 0 |
| 137 | test p, p | 138 | test p, p |
| 138 | jnz tails | 139 | jnz bignum_add_tails |
| 139 | #if WINDOWS_ABI | 140 | #if WINDOWS_ABI |
| 140 | pop rsi | 141 | pop rsi |
| 141 | pop rdi | 142 | pop rdi |
| @@ -144,16 +145,16 @@ ytoploop: | |||
| 144 | 145 | ||
| 145 | // Adding a non-trivial tail, when p > max(m,n) | 146 | // Adding a non-trivial tail, when p > max(m,n) |
| 146 | 147 | ||
| 147 | tails: | 148 | bignum_add_tails: |
| 148 | mov [z+8*i],a | 149 | mov [z+8*i],a |
| 149 | xor a, a | 150 | xor a, a |
| 150 | jmp tail | 151 | jmp bignum_add_tail |
| 151 | tailloop: | 152 | bignum_add_tailloop: |
| 152 | mov [z+8*i],a | 153 | mov [z+8*i],a |
| 153 | tail: | 154 | bignum_add_tail: |
| 154 | inc i | 155 | inc i |
| 155 | dec p | 156 | dec p |
| 156 | jnz tailloop | 157 | jnz bignum_add_tailloop |
| 157 | #if WINDOWS_ABI | 158 | #if WINDOWS_ABI |
| 158 | pop rsi | 159 | pop rsi |
| 159 | pop rdi | 160 | pop rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S index 25ba17bce2..a611919603 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S | |||
| @@ -1,3 +1,5 @@ | |||
| 1 | // $OpenBSD: bignum_cmadd.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
| 2 | // | ||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | // | 4 | // |
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
| @@ -16,8 +18,8 @@ | |||
| 16 | // Multiply-add with single-word multiplier, z := z + c * y | 18 | // Multiply-add with single-word multiplier, z := z + c * y |
| 17 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] | 19 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] |
| 18 | // | 20 | // |
| 19 | // extern uint64_t bignum_cmadd | 21 | // extern uint64_t bignum_cmadd(uint64_t k, uint64_t *z, uint64_t c, uint64_t n, |
| 20 | // (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); | 22 | // const uint64_t *y); |
| 21 | // | 23 | // |
| 22 | // Does the "z := z + c * y" operation where y is n digits, result z is p. | 24 | // Does the "z := z + c * y" operation where y is n digits, result z is p. |
| 23 | // Truncates the result in general. | 25 | // Truncates the result in general. |
| @@ -54,7 +56,7 @@ | |||
| 54 | 56 | ||
| 55 | 57 | ||
| 56 | S2N_BN_SYMBOL(bignum_cmadd): | 58 | S2N_BN_SYMBOL(bignum_cmadd): |
| 57 | _CET_ENDBR | 59 | _CET_ENDBR |
| 58 | 60 | ||
| 59 | #if WINDOWS_ABI | 61 | #if WINDOWS_ABI |
| 60 | push rdi | 62 | push rdi |
| @@ -82,7 +84,7 @@ S2N_BN_SYMBOL(bignum_cmadd): | |||
| 82 | 84 | ||
| 83 | xor h, h | 85 | xor h, h |
| 84 | test n, n | 86 | test n, n |
| 85 | jz end | 87 | jz bignum_cmadd_end |
| 86 | 88 | ||
| 87 | // Move c into a safer register as multiplies overwrite rdx | 89 | // Move c into a safer register as multiplies overwrite rdx |
| 88 | 90 | ||
| @@ -96,11 +98,11 @@ S2N_BN_SYMBOL(bignum_cmadd): | |||
| 96 | mov h, rdx | 98 | mov h, rdx |
| 97 | mov ishort, 1 | 99 | mov ishort, 1 |
| 98 | dec n | 100 | dec n |
| 99 | jz hightail | 101 | jz bignum_cmadd_hightail |
| 100 | 102 | ||
| 101 | // Main loop, where we always have CF + previous high part h to add in | 103 | // Main loop, where we always have CF + previous high part h to add in |
| 102 | 104 | ||
| 103 | loop: | 105 | bignum_cmadd_loop: |
| 104 | adc h, [z+8*i] | 106 | adc h, [z+8*i] |
| 105 | sbb r, r | 107 | sbb r, r |
| 106 | mov rax, [x+8*i] | 108 | mov rax, [x+8*i] |
| @@ -111,36 +113,36 @@ loop: | |||
| 111 | mov h, rdx | 113 | mov h, rdx |
| 112 | inc i | 114 | inc i |
| 113 | dec n | 115 | dec n |
| 114 | jnz loop | 116 | jnz bignum_cmadd_loop |
| 115 | 117 | ||
| 116 | hightail: | 118 | bignum_cmadd_hightail: |
| 117 | adc h, 0 | 119 | adc h, 0 |
| 118 | 120 | ||
| 119 | // Propagate the carry all the way to the end with h as extra carry word | 121 | // Propagate the carry all the way to the end with h as extra carry word |
| 120 | 122 | ||
| 121 | tail: | 123 | bignum_cmadd_tail: |
| 122 | test p, p | 124 | test p, p |
| 123 | jz end | 125 | jz bignum_cmadd_end |
| 124 | 126 | ||
| 125 | add [z+8*i], h | 127 | add [z+8*i], h |
| 126 | mov hshort, 0 | 128 | mov hshort, 0 |
| 127 | inc i | 129 | inc i |
| 128 | dec p | 130 | dec p |
| 129 | jz highend | 131 | jz bignum_cmadd_highend |
| 130 | 132 | ||
| 131 | tloop: | 133 | bignum_cmadd_tloop: |
| 132 | adc [z+8*i], h | 134 | adc [z+8*i], h |
| 133 | inc i | 135 | inc i |
| 134 | dec p | 136 | dec p |
| 135 | jnz tloop | 137 | jnz bignum_cmadd_tloop |
| 136 | 138 | ||
| 137 | highend: | 139 | bignum_cmadd_highend: |
| 138 | 140 | ||
| 139 | adc h, 0 | 141 | adc h, 0 |
| 140 | 142 | ||
| 141 | // Return the high/carry word | 143 | // Return the high/carry word |
| 142 | 144 | ||
| 143 | end: | 145 | bignum_cmadd_end: |
| 144 | mov rax, h | 146 | mov rax, h |
| 145 | 147 | ||
| 146 | pop rbx | 148 | pop rbx |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S index 12f785d63a..eb71d9da44 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S | |||
| @@ -1,3 +1,5 @@ | |||
| 1 | // $OpenBSD: bignum_cmul.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
| 2 | // | ||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | // | 4 | // |
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
| @@ -16,8 +18,8 @@ | |||
| 16 | // Multiply by a single word, z := c * y | 18 | // Multiply by a single word, z := c * y |
| 17 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] | 19 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] |
| 18 | // | 20 | // |
| 19 | // extern uint64_t bignum_cmul | 21 | // extern uint64_t bignum_cmul(uint64_t k, uint64_t *z, uint64_t c, uint64_t n, |
| 20 | // (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); | 22 | // const uint64_t *y); |
| 21 | // | 23 | // |
| 22 | // Does the "z := c * y" operation where y is n digits, result z is p. | 24 | // Does the "z := c * y" operation where y is n digits, result z is p. |
| 23 | // Truncates the result in general unless p >= n + 1. | 25 | // Truncates the result in general unless p >= n + 1. |
| @@ -51,7 +53,7 @@ | |||
| 51 | 53 | ||
| 52 | 54 | ||
| 53 | S2N_BN_SYMBOL(bignum_cmul): | 55 | S2N_BN_SYMBOL(bignum_cmul): |
| 54 | _CET_ENDBR | 56 | _CET_ENDBR |
| 55 | 57 | ||
| 56 | #if WINDOWS_ABI | 58 | #if WINDOWS_ABI |
| 57 | push rdi | 59 | push rdi |
| @@ -76,7 +78,7 @@ S2N_BN_SYMBOL(bignum_cmul): | |||
| 76 | xor h, h | 78 | xor h, h |
| 77 | xor i, i | 79 | xor i, i |
| 78 | test n, n | 80 | test n, n |
| 79 | jz tail | 81 | jz bignum_cmul_tail |
| 80 | 82 | ||
| 81 | // Move c into a safer register as multiplies overwrite rdx | 83 | // Move c into a safer register as multiplies overwrite rdx |
| 82 | 84 | ||
| @@ -90,11 +92,11 @@ S2N_BN_SYMBOL(bignum_cmul): | |||
| 90 | mov h, rdx | 92 | mov h, rdx |
| 91 | inc i | 93 | inc i |
| 92 | cmp i, n | 94 | cmp i, n |
| 93 | jz tail | 95 | jz bignum_cmul_tail |
| 94 | 96 | ||
| 95 | // Main loop doing the multiplications | 97 | // Main loop doing the multiplications |
| 96 | 98 | ||
| 97 | loop: | 99 | bignum_cmul_loop: |
| 98 | mov rax, [x+8*i] | 100 | mov rax, [x+8*i] |
| 99 | mul c | 101 | mul c |
| 100 | add rax, h | 102 | add rax, h |
| @@ -103,28 +105,28 @@ loop: | |||
| 103 | mov h, rdx | 105 | mov h, rdx |
| 104 | inc i | 106 | inc i |
| 105 | cmp i, n | 107 | cmp i, n |
| 106 | jc loop | 108 | jc bignum_cmul_loop |
| 107 | 109 | ||
| 108 | // Add a tail when the destination is longer | 110 | // Add a tail when the destination is longer |
| 109 | 111 | ||
| 110 | tail: | 112 | bignum_cmul_tail: |
| 111 | cmp i, p | 113 | cmp i, p |
| 112 | jnc end | 114 | jnc bignum_cmul_end |
| 113 | mov [z+8*i], h | 115 | mov [z+8*i], h |
| 114 | xor h, h | 116 | xor h, h |
| 115 | inc i | 117 | inc i |
| 116 | cmp i, p | 118 | cmp i, p |
| 117 | jnc end | 119 | jnc bignum_cmul_end |
| 118 | 120 | ||
| 119 | tloop: | 121 | bignum_cmul_tloop: |
| 120 | mov [z+8*i], h | 122 | mov [z+8*i], h |
| 121 | inc i | 123 | inc i |
| 122 | cmp i, p | 124 | cmp i, p |
| 123 | jc tloop | 125 | jc bignum_cmul_tloop |
| 124 | 126 | ||
| 125 | // Return the high/carry word | 127 | // Return the high/carry word |
| 126 | 128 | ||
| 127 | end: | 129 | bignum_cmul_end: |
| 128 | mov rax, h | 130 | mov rax, h |
| 129 | 131 | ||
| 130 | #if WINDOWS_ABI | 132 | #if WINDOWS_ABI |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_modadd.S b/src/lib/libcrypto/bn/arch/amd64/bignum_modadd.S new file mode 100644 index 0000000000..baf27fdc7f --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_modadd.S | |||
| @@ -0,0 +1,112 @@ | |||
| 1 | // $OpenBSD: bignum_modadd.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
| 2 | // | ||
| 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 4 | // | ||
| 5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 6 | // purpose with or without fee is hereby granted, provided that the above | ||
| 7 | // copyright notice and this permission notice appear in all copies. | ||
| 8 | // | ||
| 9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 16 | |||
| 17 | // ---------------------------------------------------------------------------- | ||
| 18 | // Add modulo m, z := (x + y) mod m, assuming x and y reduced | ||
| 19 | // Inputs x[k], y[k], m[k]; output z[k] | ||
| 20 | // | ||
| 21 | // extern void bignum_modadd(uint64_t k, uint64_t *z, const uint64_t *x, | ||
| 22 | // const uint64_t *y, const uint64_t *m); | ||
| 23 | // | ||
| 24 | // Standard x86-64 ABI: RDI = k, RSI = z, RDX = x, RCX = y, R8 = m | ||
| 25 | // Microsoft x64 ABI: RCX = k, RDX = z, R8 = x, R9 = y, [RSP+40] = m | ||
| 26 | // ---------------------------------------------------------------------------- | ||
| 27 | |||
| 28 | #include "s2n_bignum_internal.h" | ||
| 29 | |||
| 30 | .intel_syntax noprefix | ||
| 31 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_modadd) | ||
| 32 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_modadd) | ||
| 33 | .text | ||
| 34 | |||
| 35 | #define k rdi | ||
| 36 | #define z rsi | ||
| 37 | #define x rdx | ||
| 38 | #define y rcx | ||
| 39 | #define m r8 | ||
| 40 | #define i r9 | ||
| 41 | #define j r10 | ||
| 42 | #define a rax | ||
| 43 | #define c r11 | ||
| 44 | |||
| 45 | S2N_BN_SYMBOL(bignum_modadd): | ||
| 46 | _CET_ENDBR | ||
| 47 | |||
| 48 | #if WINDOWS_ABI | ||
| 49 | push rdi | ||
| 50 | push rsi | ||
| 51 | mov rdi, rcx | ||
| 52 | mov rsi, rdx | ||
| 53 | mov rdx, r8 | ||
| 54 | mov rcx, r9 | ||
| 55 | mov r8, [rsp+56] | ||
| 56 | #endif | ||
| 57 | |||
| 58 | // If k = 0 do nothing | ||
| 59 | |||
| 60 | test k, k | ||
| 61 | jz bignum_modadd_end | ||
| 62 | |||
| 63 | // First just add (c::z) := x + y | ||
| 64 | |||
| 65 | xor c, c | ||
| 66 | mov j, k | ||
| 67 | xor i, i | ||
| 68 | bignum_modadd_addloop: | ||
| 69 | mov a, [x+8*i] | ||
| 70 | adc a, [y+8*i] | ||
| 71 | mov [z+8*i], a | ||
| 72 | inc i | ||
| 73 | dec j | ||
| 74 | jnz bignum_modadd_addloop | ||
| 75 | adc c, 0 | ||
| 76 | |||
| 77 | // Now do a comparison subtraction (c::z) - m, recording mask for (c::z) >= m | ||
| 78 | |||
| 79 | mov j, k | ||
| 80 | xor i, i | ||
| 81 | bignum_modadd_cmploop: | ||
| 82 | mov a, [z+8*i] | ||
| 83 | sbb a, [m+8*i] | ||
| 84 | inc i | ||
| 85 | dec j | ||
| 86 | jnz bignum_modadd_cmploop | ||
| 87 | sbb c, 0 | ||
| 88 | not c | ||
| 89 | |||
| 90 | // Now do a masked subtraction z := z - [c] * m | ||
| 91 | |||
| 92 | xor i, i | ||
| 93 | bignum_modadd_subloop: | ||
| 94 | mov a, [m+8*i] | ||
| 95 | and a, c | ||
| 96 | neg j | ||
| 97 | sbb [z+8*i], a | ||
| 98 | sbb j, j | ||
| 99 | inc i | ||
| 100 | cmp i, k | ||
| 101 | jc bignum_modadd_subloop | ||
| 102 | |||
| 103 | bignum_modadd_end: | ||
| 104 | #if WINDOWS_ABI | ||
| 105 | pop rsi | ||
| 106 | pop rdi | ||
| 107 | #endif | ||
| 108 | ret | ||
| 109 | |||
| 110 | #if defined(__linux__) && defined(__ELF__) | ||
| 111 | .section .note.GNU-stack,"",%progbits | ||
| 112 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_modsub.S b/src/lib/libcrypto/bn/arch/amd64/bignum_modsub.S new file mode 100644 index 0000000000..63b3230e35 --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_modsub.S | |||
| @@ -0,0 +1,99 @@ | |||
| 1 | // $OpenBSD: bignum_modsub.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
| 2 | // | ||
| 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 4 | // | ||
| 5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 6 | // purpose with or without fee is hereby granted, provided that the above | ||
| 7 | // copyright notice and this permission notice appear in all copies. | ||
| 8 | // | ||
| 9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 16 | |||
| 17 | // ---------------------------------------------------------------------------- | ||
| 18 | // Subtract modulo m, z := (x - y) mod m, assuming x and y reduced | ||
| 19 | // Inputs x[k], y[k], m[k]; output z[k] | ||
| 20 | // | ||
| 21 | // extern void bignum_modsub(uint64_t k, uint64_t *z, const uint64_t *x, | ||
| 22 | // const uint64_t *y, const uint64_t *m); | ||
| 23 | // | ||
| 24 | // Standard x86-64 ABI: RDI = k, RSI = z, RDX = x, RCX = y, R8 = m | ||
| 25 | // Microsoft x64 ABI: RCX = k, RDX = z, R8 = x, R9 = y, [RSP+40] = m | ||
| 26 | // ---------------------------------------------------------------------------- | ||
| 27 | |||
| 28 | #include "s2n_bignum_internal.h" | ||
| 29 | |||
| 30 | .intel_syntax noprefix | ||
| 31 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_modsub) | ||
| 32 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_modsub) | ||
| 33 | .text | ||
| 34 | |||
| 35 | #define k rdi | ||
| 36 | #define z rsi | ||
| 37 | #define x rdx | ||
| 38 | #define y rcx | ||
| 39 | #define m r8 | ||
| 40 | #define i r9 | ||
| 41 | #define j r10 | ||
| 42 | #define a rax | ||
| 43 | #define c r11 | ||
| 44 | |||
| 45 | S2N_BN_SYMBOL(bignum_modsub): | ||
| 46 | _CET_ENDBR | ||
| 47 | |||
| 48 | #if WINDOWS_ABI | ||
| 49 | push rdi | ||
| 50 | push rsi | ||
| 51 | mov rdi, rcx | ||
| 52 | mov rsi, rdx | ||
| 53 | mov rdx, r8 | ||
| 54 | mov rcx, r9 | ||
| 55 | mov r8, [rsp+56] | ||
| 56 | #endif | ||
| 57 | |||
| 58 | // If k = 0 do nothing | ||
| 59 | |||
| 60 | test k, k | ||
| 61 | jz bignum_modsub_end | ||
| 62 | |||
| 63 | // Subtract z := x - y and record a mask for the carry x - y < 0 | ||
| 64 | |||
| 65 | xor c, c | ||
| 66 | mov j, k | ||
| 67 | xor i, i | ||
| 68 | bignum_modsub_subloop: | ||
| 69 | mov a, [x+8*i] | ||
| 70 | sbb a, [y+8*i] | ||
| 71 | mov [z+8*i], a | ||
| 72 | inc i | ||
| 73 | dec j | ||
| 74 | jnz bignum_modsub_subloop | ||
| 75 | sbb c, c | ||
| 76 | |||
| 77 | // Now do a masked addition z := z + [c] * m | ||
| 78 | |||
| 79 | xor i, i | ||
| 80 | bignum_modsub_addloop: | ||
| 81 | mov a, [m+8*i] | ||
| 82 | and a, c | ||
| 83 | neg j | ||
| 84 | adc [z+8*i], a | ||
| 85 | sbb j, j | ||
| 86 | inc i | ||
| 87 | cmp i, k | ||
| 88 | jc bignum_modsub_addloop | ||
| 89 | |||
| 90 | bignum_modsub_end: | ||
| 91 | #if WINDOWS_ABI | ||
| 92 | pop rsi | ||
| 93 | pop rdi | ||
| 94 | #endif | ||
| 95 | ret | ||
| 96 | |||
| 97 | #if defined(__linux__) && defined(__ELF__) | ||
| 98 | .section .note.GNU-stack,"",%progbits | ||
| 99 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S index a3552679a2..538cce9af7 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S | |||
| @@ -1,3 +1,5 @@ | |||
| 1 | // $OpenBSD: bignum_mul.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
| 2 | // | ||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | // | 4 | // |
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
| @@ -16,9 +18,8 @@ | |||
| 16 | // Multiply z := x * y | 18 | // Multiply z := x * y |
| 17 | // Inputs x[m], y[n]; output z[k] | 19 | // Inputs x[m], y[n]; output z[k] |
| 18 | // | 20 | // |
| 19 | // extern void bignum_mul | 21 | // extern void bignum_mul(uint64_t k, uint64_t *z, uint64_t m, const uint64_t *x, |
| 20 | // (uint64_t k, uint64_t *z, | 22 | // uint64_t n, const uint64_t *y); |
| 21 | // uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | ||
| 22 | // | 23 | // |
| 23 | // Does the "z := x * y" operation where x is m digits, y is n, result z is k. | 24 | // Does the "z := x * y" operation where x is m digits, y is n, result z is k. |
| 24 | // Truncates the result in general unless k >= m + n | 25 | // Truncates the result in general unless k >= m + n |
| @@ -59,7 +60,7 @@ | |||
| 59 | 60 | ||
| 60 | 61 | ||
| 61 | S2N_BN_SYMBOL(bignum_mul): | 62 | S2N_BN_SYMBOL(bignum_mul): |
| 62 | _CET_ENDBR | 63 | _CET_ENDBR |
| 63 | 64 | ||
| 64 | #if WINDOWS_ABI | 65 | #if WINDOWS_ABI |
| 65 | push rdi | 66 | push rdi |
| @@ -88,7 +89,7 @@ S2N_BN_SYMBOL(bignum_mul): | |||
| 88 | // If we did a multiply-add variant, however, then we could | 89 | // If we did a multiply-add variant, however, then we could |
| 89 | 90 | ||
| 90 | test p, p | 91 | test p, p |
| 91 | jz end | 92 | jz bignum_mul_end |
| 92 | 93 | ||
| 93 | // Set initial 2-part sum to zero (we zero c inside the body) | 94 | // Set initial 2-part sum to zero (we zero c inside the body) |
| 94 | 95 | ||
| @@ -99,7 +100,7 @@ S2N_BN_SYMBOL(bignum_mul): | |||
| 99 | 100 | ||
| 100 | xor k, k | 101 | xor k, k |
| 101 | 102 | ||
| 102 | outerloop: | 103 | bignum_mul_outerloop: |
| 103 | 104 | ||
| 104 | // Zero our carry term first; we eventually want it and a zero is useful now | 105 | // Zero our carry term first; we eventually want it and a zero is useful now |
| 105 | // Set a = max 0 (k + 1 - n), i = min (k + 1) m | 106 | // Set a = max 0 (k + 1 - n), i = min (k + 1) m |
| @@ -125,11 +126,11 @@ outerloop: | |||
| 125 | mov d, k | 126 | mov d, k |
| 126 | sub d, i | 127 | sub d, i |
| 127 | sub i, a | 128 | sub i, a |
| 128 | jbe innerend | 129 | jbe bignum_mul_innerend |
| 129 | lea x,[rcx+8*a] | 130 | lea x,[rcx+8*a] |
| 130 | lea y,[r9+8*d-8] | 131 | lea y,[r9+8*d-8] |
| 131 | 132 | ||
| 132 | innerloop: | 133 | bignum_mul_innerloop: |
| 133 | mov rax, [y+8*i] | 134 | mov rax, [y+8*i] |
| 134 | mul QWORD PTR [x] | 135 | mul QWORD PTR [x] |
| 135 | add x, 8 | 136 | add x, 8 |
| @@ -137,9 +138,9 @@ innerloop: | |||
| 137 | adc h, rdx | 138 | adc h, rdx |
| 138 | adc c, 0 | 139 | adc c, 0 |
| 139 | dec i | 140 | dec i |
| 140 | jnz innerloop | 141 | jnz bignum_mul_innerloop |
| 141 | 142 | ||
| 142 | innerend: | 143 | bignum_mul_innerend: |
| 143 | 144 | ||
| 144 | mov [z], l | 145 | mov [z], l |
| 145 | mov l, h | 146 | mov l, h |
| @@ -147,9 +148,9 @@ innerend: | |||
| 147 | add z, 8 | 148 | add z, 8 |
| 148 | 149 | ||
| 149 | cmp k, p | 150 | cmp k, p |
| 150 | jc outerloop | 151 | jc bignum_mul_outerloop |
| 151 | 152 | ||
| 152 | end: | 153 | bignum_mul_end: |
| 153 | pop r15 | 154 | pop r15 |
| 154 | pop r14 | 155 | pop r14 |
| 155 | pop r13 | 156 | pop r13 |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8.S new file mode 100644 index 0000000000..d6ad514020 --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8.S | |||
| @@ -0,0 +1,187 @@ | |||
| 1 | // $OpenBSD: bignum_mul_4_8.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
| 2 | // | ||
| 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 4 | // | ||
| 5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 6 | // purpose with or without fee is hereby granted, provided that the above | ||
| 7 | // copyright notice and this permission notice appear in all copies. | ||
| 8 | // | ||
| 9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 16 | |||
| 17 | // ---------------------------------------------------------------------------- | ||
| 18 | // Multiply z := x * y | ||
| 19 | // Inputs x[4], y[4]; output z[8] | ||
| 20 | // | ||
| 21 | // extern void bignum_mul_4_8(uint64_t z[static 8], const uint64_t x[static 4], | ||
| 22 | // const uint64_t y[static 4]); | ||
| 23 | // | ||
| 24 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y | ||
| 25 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y | ||
| 26 | // ---------------------------------------------------------------------------- | ||
| 27 | |||
| 28 | #include "s2n_bignum_internal.h" | ||
| 29 | |||
| 30 | .intel_syntax noprefix | ||
| 31 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_4_8) | ||
| 32 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_4_8) | ||
| 33 | .text | ||
| 34 | |||
| 35 | // These are actually right | ||
| 36 | |||
| 37 | #define z rdi | ||
| 38 | #define x rsi | ||
| 39 | |||
| 40 | // Copied in or set up | ||
| 41 | |||
| 42 | #define y rcx | ||
| 43 | |||
| 44 | // A zero register | ||
| 45 | |||
| 46 | #define zero rbp | ||
| 47 | #define zeroe ebp | ||
| 48 | |||
| 49 | // Add in x[i] * rdx to the (i,i+1) position with the register window | ||
| 50 | // Would be nice to have conditional expressions reg[i], reg[i+1] ... | ||
| 51 | |||
| 52 | .macro mulpadd arg1,arg2 | ||
| 53 | mulx rbx, rax, [x+8*\arg2] | ||
| 54 | .if ((\arg1 + \arg2) % 4 == 0) | ||
| 55 | adcx r8, rax | ||
| 56 | adox r9, rbx | ||
| 57 | .elseif ((\arg1 + \arg2) % 4 == 1) | ||
| 58 | adcx r9, rax | ||
| 59 | adox r10, rbx | ||
| 60 | .elseif ((\arg1 + \arg2) % 4 == 2) | ||
| 61 | adcx r10, rax | ||
| 62 | adox r11, rbx | ||
| 63 | .elseif ((\arg1 + \arg2) % 4 == 3) | ||
| 64 | adcx r11, rax | ||
| 65 | adox r8, rbx | ||
| 66 | .endif | ||
| 67 | |||
| 68 | .endm | ||
| 69 | |||
| 70 | |||
| 71 | // Add in the whole j'th row | ||
| 72 | |||
| 73 | .macro addrow arg1 | ||
| 74 | mov rdx, [y+8*\arg1] | ||
| 75 | xor zeroe, zeroe | ||
| 76 | |||
| 77 | mulpadd \arg1, 0 | ||
| 78 | |||
| 79 | .if (\arg1 % 4 == 0) | ||
| 80 | mov [z+8*\arg1],r8 | ||
| 81 | .elseif (\arg1 % 4 == 1) | ||
| 82 | mov [z+8*\arg1],r9 | ||
| 83 | .elseif (\arg1 % 4 == 2) | ||
| 84 | mov [z+8*\arg1],r10 | ||
| 85 | .elseif (\arg1 % 4 == 3) | ||
| 86 | mov [z+8*\arg1],r11 | ||
| 87 | .endif | ||
| 88 | |||
| 89 | mulpadd \arg1, 1 | ||
| 90 | mulpadd \arg1, 2 | ||
| 91 | |||
| 92 | .if (\arg1 % 4 == 0) | ||
| 93 | mulx r8, rax, [x+24] | ||
| 94 | adcx r11, rax | ||
| 95 | adox r8, zero | ||
| 96 | adcx r8, zero | ||
| 97 | .elseif (\arg1 % 4 == 1) | ||
| 98 | mulx r9, rax, [x+24] | ||
| 99 | adcx r8, rax | ||
| 100 | adox r9, zero | ||
| 101 | adcx r9, zero | ||
| 102 | .elseif (\arg1 % 4 == 2) | ||
| 103 | mulx r10, rax, [x+24] | ||
| 104 | adcx r9, rax | ||
| 105 | adox r10, zero | ||
| 106 | adcx r10, zero | ||
| 107 | .elseif (\arg1 % 4 == 3) | ||
| 108 | mulx r11, rax, [x+24] | ||
| 109 | adcx r10, rax | ||
| 110 | adox r11, zero | ||
| 111 | adcx r11, zero | ||
| 112 | .endif | ||
| 113 | |||
| 114 | .endm | ||
| 115 | |||
| 116 | |||
| 117 | |||
| 118 | S2N_BN_SYMBOL(bignum_mul_4_8): | ||
| 119 | _CET_ENDBR | ||
| 120 | |||
| 121 | #if WINDOWS_ABI | ||
| 122 | push rdi | ||
| 123 | push rsi | ||
| 124 | mov rdi, rcx | ||
| 125 | mov rsi, rdx | ||
| 126 | mov rdx, r8 | ||
| 127 | #endif | ||
| 128 | |||
| 129 | // Save more registers to play with | ||
| 130 | |||
| 131 | push rbp | ||
| 132 | push rbx | ||
| 133 | |||
| 134 | // Copy y into a safe register to start with | ||
| 135 | |||
| 136 | mov y, rdx | ||
| 137 | |||
| 138 | // Zero a register, which also makes sure we don't get a fake carry-in | ||
| 139 | |||
| 140 | xor zeroe, zeroe | ||
| 141 | |||
| 142 | // Do the zeroth row, which is a bit different | ||
| 143 | // Write back the zero-zero product and then accumulate | ||
| 144 | // r8,r11,r10,r9 as y[0] * x from 1..4 | ||
| 145 | |||
| 146 | mov rdx, [y] | ||
| 147 | |||
| 148 | mulx r9, r8, [x] | ||
| 149 | mov [z], r8 | ||
| 150 | |||
| 151 | mulx r10, rbx, [x+8] | ||
| 152 | adcx r9, rbx | ||
| 153 | |||
| 154 | mulx r11, rbx, [x+16] | ||
| 155 | adcx r10, rbx | ||
| 156 | |||
| 157 | mulx r8, rbx, [x+24] | ||
| 158 | adcx r11, rbx | ||
| 159 | adcx r8, zero | ||
| 160 | |||
| 161 | // Now all the other rows in a uniform pattern | ||
| 162 | |||
| 163 | addrow 1 | ||
| 164 | addrow 2 | ||
| 165 | addrow 3 | ||
| 166 | |||
| 167 | // Now write back the additional columns | ||
| 168 | |||
| 169 | mov [z+32], r8 | ||
| 170 | mov [z+40], r9 | ||
| 171 | mov [z+48], r10 | ||
| 172 | mov [z+56], r11 | ||
| 173 | |||
| 174 | // Restore registers and return | ||
| 175 | |||
| 176 | pop rbx | ||
| 177 | pop rbp | ||
| 178 | |||
| 179 | #if WINDOWS_ABI | ||
| 180 | pop rsi | ||
| 181 | pop rdi | ||
| 182 | #endif | ||
| 183 | ret | ||
| 184 | |||
| 185 | #if defined(__linux__) && defined(__ELF__) | ||
| 186 | .section .note.GNU-stack,"",%progbits | ||
| 187 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S index 70ff69e372..2592d1d658 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S | |||
| @@ -1,3 +1,5 @@ | |||
| 1 | // $OpenBSD: bignum_mul_4_8_alt.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
| 2 | // | ||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | // | 4 | // |
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
| @@ -16,8 +18,8 @@ | |||
| 16 | // Multiply z := x * y | 18 | // Multiply z := x * y |
| 17 | // Inputs x[4], y[4]; output z[8] | 19 | // Inputs x[4], y[4]; output z[8] |
| 18 | // | 20 | // |
| 19 | // extern void bignum_mul_4_8_alt | 21 | // extern void bignum_mul_4_8_alt(uint64_t z[static 8], const uint64_t x[static 4], |
| 20 | // (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]); | 22 | // const uint64_t y[static 4]); |
| 21 | // | 23 | // |
| 22 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y | 24 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y |
| 23 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y | 25 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y |
| @@ -72,7 +74,7 @@ | |||
| 72 | adc h, rdx | 74 | adc h, rdx |
| 73 | 75 | ||
| 74 | S2N_BN_SYMBOL(bignum_mul_4_8_alt): | 76 | S2N_BN_SYMBOL(bignum_mul_4_8_alt): |
| 75 | _CET_ENDBR | 77 | _CET_ENDBR |
| 76 | 78 | ||
| 77 | #if WINDOWS_ABI | 79 | #if WINDOWS_ABI |
| 78 | push rdi | 80 | push rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12.S new file mode 100644 index 0000000000..56cbdf06e0 --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12.S | |||
| @@ -0,0 +1,223 @@ | |||
| 1 | // $OpenBSD: bignum_mul_6_12.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
| 2 | // | ||
| 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 4 | // | ||
| 5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 6 | // purpose with or without fee is hereby granted, provided that the above | ||
| 7 | // copyright notice and this permission notice appear in all copies. | ||
| 8 | // | ||
| 9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 16 | |||
| 17 | // ---------------------------------------------------------------------------- | ||
| 18 | // Multiply z := x * y | ||
| 19 | // Inputs x[6], y[6]; output z[12] | ||
| 20 | // | ||
| 21 | // extern void bignum_mul_6_12(uint64_t z[static 12], const uint64_t x[static 6], | ||
| 22 | // const uint64_t y[static 6]); | ||
| 23 | // | ||
| 24 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y | ||
| 25 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y | ||
| 26 | // ---------------------------------------------------------------------------- | ||
| 27 | |||
| 28 | #include "s2n_bignum_internal.h" | ||
| 29 | |||
| 30 | .intel_syntax noprefix | ||
| 31 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_6_12) | ||
| 32 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_6_12) | ||
| 33 | .text | ||
| 34 | |||
| 35 | // These are actually right | ||
| 36 | |||
| 37 | #define z rdi | ||
| 38 | #define x rsi | ||
| 39 | |||
| 40 | // Copied in or set up | ||
| 41 | |||
| 42 | #define y rcx | ||
| 43 | |||
| 44 | // A zero register | ||
| 45 | |||
| 46 | #define zero rbp | ||
| 47 | #define zeroe ebp | ||
| 48 | |||
| 49 | // Add in x[i] * rdx to the (i,i+1) position with the register window | ||
| 50 | // Would be nice to have conditional expressions reg[i], reg[i+1] ... | ||
| 51 | |||
| 52 | .macro mulpadd arg1,arg2 | ||
| 53 | mulx rbx, rax, [x+8*\arg2] | ||
| 54 | .if ((\arg1 + \arg2) % 6 == 0) | ||
| 55 | adcx r8, rax | ||
| 56 | adox r9, rbx | ||
| 57 | .elseif ((\arg1 + \arg2) % 6 == 1) | ||
| 58 | adcx r9, rax | ||
| 59 | adox r10, rbx | ||
| 60 | .elseif ((\arg1 + \arg2) % 6 == 2) | ||
| 61 | adcx r10, rax | ||
| 62 | adox r11, rbx | ||
| 63 | .elseif ((\arg1 + \arg2) % 6 == 3) | ||
| 64 | adcx r11, rax | ||
| 65 | adox r12, rbx | ||
| 66 | .elseif ((\arg1 + \arg2) % 6 == 4) | ||
| 67 | adcx r12, rax | ||
| 68 | adox r13, rbx | ||
| 69 | .elseif ((\arg1 + \arg2) % 6 == 5) | ||
| 70 | adcx r13, rax | ||
| 71 | adox r8, rbx | ||
| 72 | .endif | ||
| 73 | |||
| 74 | .endm | ||
| 75 | |||
| 76 | |||
| 77 | // Add in the whole j'th row | ||
| 78 | |||
| 79 | .macro addrow arg1 | ||
| 80 | mov rdx, [y+8*\arg1] | ||
| 81 | xor zeroe, zeroe | ||
| 82 | |||
| 83 | mulpadd \arg1, 0 | ||
| 84 | |||
| 85 | .if (\arg1 % 6 == 0) | ||
| 86 | mov [z+8*\arg1],r8 | ||
| 87 | .elseif (\arg1 % 6 == 1) | ||
| 88 | mov [z+8*\arg1],r9 | ||
| 89 | .elseif (\arg1 % 6 == 2) | ||
| 90 | mov [z+8*\arg1],r10 | ||
| 91 | .elseif (\arg1 % 6 == 3) | ||
| 92 | mov [z+8*\arg1],r11 | ||
| 93 | .elseif (\arg1 % 6 == 4) | ||
| 94 | mov [z+8*\arg1],r12 | ||
| 95 | .elseif (\arg1 % 6 == 5) | ||
| 96 | mov [z+8*\arg1],r13 | ||
| 97 | .endif | ||
| 98 | |||
| 99 | mulpadd \arg1, 1 | ||
| 100 | mulpadd \arg1, 2 | ||
| 101 | mulpadd \arg1, 3 | ||
| 102 | mulpadd \arg1, 4 | ||
| 103 | |||
| 104 | .if (\arg1 % 6 == 0) | ||
| 105 | mulx r8, rax, [x+40] | ||
| 106 | adcx r13, rax | ||
| 107 | adox r8, zero | ||
| 108 | adcx r8, zero | ||
| 109 | .elseif (\arg1 % 6 == 1) | ||
| 110 | mulx r9, rax, [x+40] | ||
| 111 | adcx r8, rax | ||
| 112 | adox r9, zero | ||
| 113 | adcx r9, zero | ||
| 114 | .elseif (\arg1 % 6 == 2) | ||
| 115 | mulx r10, rax, [x+40] | ||
| 116 | adcx r9, rax | ||
| 117 | adox r10, zero | ||
| 118 | adcx r10, zero | ||
| 119 | .elseif (\arg1 % 6 == 3) | ||
| 120 | mulx r11, rax, [x+40] | ||
| 121 | adcx r10, rax | ||
| 122 | adox r11, zero | ||
| 123 | adcx r11, zero | ||
| 124 | .elseif (\arg1 % 6 == 4) | ||
| 125 | mulx r12, rax, [x+40] | ||
| 126 | adcx r11, rax | ||
| 127 | adox r12, zero | ||
| 128 | adcx r12, zero | ||
| 129 | .elseif (\arg1 % 6 == 5) | ||
| 130 | mulx r13, rax, [x+40] | ||
| 131 | adcx r12, rax | ||
| 132 | adox r13, zero | ||
| 133 | adcx r13, zero | ||
| 134 | .endif | ||
| 135 | |||
| 136 | .endm | ||
| 137 | |||
| 138 | |||
| 139 | |||
| 140 | S2N_BN_SYMBOL(bignum_mul_6_12): | ||
| 141 | _CET_ENDBR | ||
| 142 | |||
| 143 | #if WINDOWS_ABI | ||
| 144 | push rdi | ||
| 145 | push rsi | ||
| 146 | mov rdi, rcx | ||
| 147 | mov rsi, rdx | ||
| 148 | mov rdx, r8 | ||
| 149 | #endif | ||
| 150 | |||
| 151 | // Save more registers to play with | ||
| 152 | |||
| 153 | push rbp | ||
| 154 | push rbx | ||
| 155 | push r12 | ||
| 156 | push r13 | ||
| 157 | |||
| 158 | // Copy y into a safe register to start with | ||
| 159 | |||
| 160 | mov y, rdx | ||
| 161 | |||
| 162 | // Zero a register, which also makes sure we don't get a fake carry-in | ||
| 163 | |||
| 164 | xor zeroe, zeroe | ||
| 165 | |||
| 166 | // Do the zeroth row, which is a bit different | ||
| 167 | // Write back the zero-zero product and then accumulate | ||
| 168 | // r8,r13,r12,r11,r10,r9 as y[0] * x from 1..6 | ||
| 169 | |||
| 170 | mov rdx, [y] | ||
| 171 | |||
| 172 | mulx r9, r8, [x] | ||
| 173 | mov [z], r8 | ||
| 174 | |||
| 175 | mulx r10, rbx, [x+8] | ||
| 176 | adcx r9, rbx | ||
| 177 | |||
| 178 | mulx r11, rbx, [x+16] | ||
| 179 | adcx r10, rbx | ||
| 180 | |||
| 181 | mulx r12, rbx, [x+24] | ||
| 182 | adcx r11, rbx | ||
| 183 | |||
| 184 | mulx r13, rbx, [x+32] | ||
| 185 | adcx r12, rbx | ||
| 186 | |||
| 187 | mulx r8, rbx, [x+40] | ||
| 188 | adcx r13, rbx | ||
| 189 | adcx r8, zero | ||
| 190 | |||
| 191 | // Now all the other rows in a uniform pattern | ||
| 192 | |||
| 193 | addrow 1 | ||
| 194 | addrow 2 | ||
| 195 | addrow 3 | ||
| 196 | addrow 4 | ||
| 197 | addrow 5 | ||
| 198 | |||
| 199 | // Now write back the additional columns | ||
| 200 | |||
| 201 | mov [z+48], r8 | ||
| 202 | mov [z+56], r9 | ||
| 203 | mov [z+64], r10 | ||
| 204 | mov [z+72], r11 | ||
| 205 | mov [z+80], r12 | ||
| 206 | mov [z+88], r13 | ||
| 207 | |||
| 208 | // Restore registers and return | ||
| 209 | |||
| 210 | pop r13 | ||
| 211 | pop r12 | ||
| 212 | pop rbx | ||
| 213 | pop rbp | ||
| 214 | |||
| 215 | #if WINDOWS_ABI | ||
| 216 | pop rsi | ||
| 217 | pop rdi | ||
| 218 | #endif | ||
| 219 | ret | ||
| 220 | |||
| 221 | #if defined(__linux__) && defined(__ELF__) | ||
| 222 | .section .note.GNU-stack,"",%progbits | ||
| 223 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12_alt.S new file mode 100644 index 0000000000..077c52b38e --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_6_12_alt.S | |||
| @@ -0,0 +1,199 @@ | |||
| 1 | // $OpenBSD: bignum_mul_6_12_alt.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
| 2 | // | ||
| 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 4 | // | ||
| 5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 6 | // purpose with or without fee is hereby granted, provided that the above | ||
| 7 | // copyright notice and this permission notice appear in all copies. | ||
| 8 | // | ||
| 9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 16 | |||
| 17 | // ---------------------------------------------------------------------------- | ||
| 18 | // Multiply z := x * y | ||
| 19 | // Inputs x[6], y[6]; output z[12] | ||
| 20 | // | ||
| 21 | // extern void bignum_mul_6_12_alt(uint64_t z[static 12], | ||
| 22 | // const uint64_t x[static 6], | ||
| 23 | // const uint64_t y[static 6]); | ||
| 24 | // | ||
| 25 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y | ||
| 26 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y | ||
| 27 | // ---------------------------------------------------------------------------- | ||
| 28 | |||
| 29 | #include "s2n_bignum_internal.h" | ||
| 30 | |||
| 31 | .intel_syntax noprefix | ||
| 32 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_6_12_alt) | ||
| 33 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_6_12_alt) | ||
| 34 | .text | ||
| 35 | |||
| 36 | // These are actually right | ||
| 37 | |||
| 38 | #define z rdi | ||
| 39 | #define x rsi | ||
| 40 | |||
| 41 | // This is moved from rdx to free it for muls | ||
| 42 | |||
| 43 | #define y rcx | ||
| 44 | |||
| 45 | // Other variables used as a rotating 3-word window to add terms to | ||
| 46 | |||
| 47 | #define t0 r8 | ||
| 48 | #define t1 r9 | ||
| 49 | #define t2 r10 | ||
| 50 | |||
| 51 | // Macro for the key "multiply and add to (c,h,l)" step | ||
| 52 | |||
| 53 | #define combadd(c,h,l,numa,numb) \ | ||
| 54 | mov rax, numa; \ | ||
| 55 | mul QWORD PTR numb; \ | ||
| 56 | add l, rax; \ | ||
| 57 | adc h, rdx; \ | ||
| 58 | adc c, 0 | ||
| 59 | |||
| 60 | // A minutely shorter form for when c = 0 initially | ||
| 61 | |||
| 62 | #define combadz(c,h,l,numa,numb) \ | ||
| 63 | mov rax, numa; \ | ||
| 64 | mul QWORD PTR numb; \ | ||
| 65 | add l, rax; \ | ||
| 66 | adc h, rdx; \ | ||
| 67 | adc c, c | ||
| 68 | |||
| 69 | // A short form where we don't expect a top carry | ||
| 70 | |||
| 71 | #define combads(h,l,numa,numb) \ | ||
| 72 | mov rax, numa; \ | ||
| 73 | mul QWORD PTR numb; \ | ||
| 74 | add l, rax; \ | ||
| 75 | adc h, rdx | ||
| 76 | |||
| 77 | S2N_BN_SYMBOL(bignum_mul_6_12_alt): | ||
| 78 | _CET_ENDBR | ||
| 79 | |||
| 80 | #if WINDOWS_ABI | ||
| 81 | push rdi | ||
| 82 | push rsi | ||
| 83 | mov rdi, rcx | ||
| 84 | mov rsi, rdx | ||
| 85 | mov rdx, r8 | ||
| 86 | #endif | ||
| 87 | |||
| 88 | // Copy y into a safe register to start with | ||
| 89 | |||
| 90 | mov y, rdx | ||
| 91 | |||
| 92 | // Result term 0 | ||
| 93 | |||
| 94 | mov rax, [x] | ||
| 95 | mul QWORD PTR [y] | ||
| 96 | |||
| 97 | mov [z], rax | ||
| 98 | mov t0, rdx | ||
| 99 | xor t1, t1 | ||
| 100 | |||
| 101 | // Result term 1 | ||
| 102 | |||
| 103 | xor t2, t2 | ||
| 104 | combads(t1,t0,[x],[y+8]) | ||
| 105 | combadz(t2,t1,t0,[x+8],[y]) | ||
| 106 | mov [z+8], t0 | ||
| 107 | |||
| 108 | // Result term 2 | ||
| 109 | |||
| 110 | xor t0, t0 | ||
| 111 | combadz(t0,t2,t1,[x],[y+16]) | ||
| 112 | combadd(t0,t2,t1,[x+8],[y+8]) | ||
| 113 | combadd(t0,t2,t1,[x+16],[y]) | ||
| 114 | mov [z+16], t1 | ||
| 115 | |||
| 116 | // Result term 3 | ||
| 117 | |||
| 118 | xor t1, t1 | ||
| 119 | combadz(t1,t0,t2,[x],[y+24]) | ||
| 120 | combadd(t1,t0,t2,[x+8],[y+16]) | ||
| 121 | combadd(t1,t0,t2,[x+16],[y+8]) | ||
| 122 | combadd(t1,t0,t2,[x+24],[y]) | ||
| 123 | mov [z+24], t2 | ||
| 124 | |||
| 125 | // Result term 4 | ||
| 126 | |||
| 127 | xor t2, t2 | ||
| 128 | combadz(t2,t1,t0,[x],[y+32]) | ||
| 129 | combadd(t2,t1,t0,[x+8],[y+24]) | ||
| 130 | combadd(t2,t1,t0,[x+16],[y+16]) | ||
| 131 | combadd(t2,t1,t0,[x+24],[y+8]) | ||
| 132 | combadd(t2,t1,t0,[x+32],[y]) | ||
| 133 | mov [z+32], t0 | ||
| 134 | |||
| 135 | // Result term 5 | ||
| 136 | |||
| 137 | xor t0, t0 | ||
| 138 | combadz(t0,t2,t1,[x],[y+40]) | ||
| 139 | combadd(t0,t2,t1,[x+8],[y+32]) | ||
| 140 | combadd(t0,t2,t1,[x+16],[y+24]) | ||
| 141 | combadd(t0,t2,t1,[x+24],[y+16]) | ||
| 142 | combadd(t0,t2,t1,[x+32],[y+8]) | ||
| 143 | combadd(t0,t2,t1,[x+40],[y]) | ||
| 144 | mov [z+40], t1 | ||
| 145 | |||
| 146 | // Result term 6 | ||
| 147 | |||
| 148 | xor t1, t1 | ||
| 149 | combadz(t1,t0,t2,[x+8],[y+40]) | ||
| 150 | combadd(t1,t0,t2,[x+16],[y+32]) | ||
| 151 | combadd(t1,t0,t2,[x+24],[y+24]) | ||
| 152 | combadd(t1,t0,t2,[x+32],[y+16]) | ||
| 153 | combadd(t1,t0,t2,[x+40],[y+8]) | ||
| 154 | mov [z+48], t2 | ||
| 155 | |||
| 156 | // Result term 7 | ||
| 157 | |||
| 158 | xor t2, t2 | ||
| 159 | combadz(t2,t1,t0,[x+16],[y+40]) | ||
| 160 | combadd(t2,t1,t0,[x+24],[y+32]) | ||
| 161 | combadd(t2,t1,t0,[x+32],[y+24]) | ||
| 162 | combadd(t2,t1,t0,[x+40],[y+16]) | ||
| 163 | mov [z+56], t0 | ||
| 164 | |||
| 165 | // Result term 8 | ||
| 166 | |||
| 167 | xor t0, t0 | ||
| 168 | combadz(t0,t2,t1,[x+24],[y+40]) | ||
| 169 | combadd(t0,t2,t1,[x+32],[y+32]) | ||
| 170 | combadd(t0,t2,t1,[x+40],[y+24]) | ||
| 171 | mov [z+64], t1 | ||
| 172 | |||
| 173 | // Result term 9 | ||
| 174 | |||
| 175 | xor t1, t1 | ||
| 176 | combadz(t1,t0,t2,[x+32],[y+40]) | ||
| 177 | combadd(t1,t0,t2,[x+40],[y+32]) | ||
| 178 | mov [z+72], t2 | ||
| 179 | |||
| 180 | // Result term 10 | ||
| 181 | |||
| 182 | combads(t1,t0,[x+40],[y+40]) | ||
| 183 | mov [z+80], t0 | ||
| 184 | |||
| 185 | // Result term 11 | ||
| 186 | |||
| 187 | mov [z+88], t1 | ||
| 188 | |||
| 189 | // Return | ||
| 190 | |||
| 191 | #if WINDOWS_ABI | ||
| 192 | pop rsi | ||
| 193 | pop rdi | ||
| 194 | #endif | ||
| 195 | ret | ||
| 196 | |||
| 197 | #if defined(__linux__) && defined(__ELF__) | ||
| 198 | .section .note.GNU-stack,"",%progbits | ||
| 199 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16.S new file mode 100644 index 0000000000..faa0196d8e --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16.S | |||
| @@ -0,0 +1,273 @@ | |||
| 1 | // $OpenBSD: bignum_mul_8_16.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
| 2 | // | ||
| 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 4 | // | ||
| 5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 6 | // purpose with or without fee is hereby granted, provided that the above | ||
| 7 | // copyright notice and this permission notice appear in all copies. | ||
| 8 | // | ||
| 9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 16 | |||
| 17 | // ---------------------------------------------------------------------------- | ||
| 18 | // Multiply z := x * y | ||
| 19 | // Inputs x[8], y[8]; output z[16] | ||
| 20 | // | ||
| 21 | // extern void bignum_mul_8_16(uint64_t z[static 16], const uint64_t x[static 8], | ||
| 22 | // const uint64_t y[static 8]); | ||
| 23 | // | ||
| 24 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y | ||
| 25 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y | ||
| 26 | // ---------------------------------------------------------------------------- | ||
| 27 | |||
| 28 | #include "s2n_bignum_internal.h" | ||
| 29 | |||
| 30 | .intel_syntax noprefix | ||
| 31 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_8_16) | ||
| 32 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_8_16) | ||
| 33 | .text | ||
| 34 | |||
| 35 | // These are actually right | ||
| 36 | |||
| 37 | #define z rdi | ||
| 38 | #define x rsi | ||
| 39 | |||
| 40 | // Copied in or set up | ||
| 41 | |||
| 42 | #define y rcx | ||
| 43 | |||
| 44 | // A zero register | ||
| 45 | |||
| 46 | #define zero rbp | ||
| 47 | #define zeroe ebp | ||
| 48 | |||
| 49 | // mulpadd i, j adds x[i] * rdx (now assumed = y[j]) into the window at i+j | ||
| 50 | |||
| 51 | .macro mulpadd arg1,arg2 | ||
| 52 | mulx rbx, rax, [x+8*\arg1] | ||
| 53 | .if ((\arg1 + \arg2) % 8 == 0) | ||
| 54 | adcx r8, rax | ||
| 55 | adox r9, rbx | ||
| 56 | .elseif ((\arg1 + \arg2) % 8 == 1) | ||
| 57 | adcx r9, rax | ||
| 58 | adox r10, rbx | ||
| 59 | .elseif ((\arg1 + \arg2) % 8 == 2) | ||
| 60 | adcx r10, rax | ||
| 61 | adox r11, rbx | ||
| 62 | .elseif ((\arg1 + \arg2) % 8 == 3) | ||
| 63 | adcx r11, rax | ||
| 64 | adox r12, rbx | ||
| 65 | .elseif ((\arg1 + \arg2) % 8 == 4) | ||
| 66 | adcx r12, rax | ||
| 67 | adox r13, rbx | ||
| 68 | .elseif ((\arg1 + \arg2) % 8 == 5) | ||
| 69 | adcx r13, rax | ||
| 70 | adox r14, rbx | ||
| 71 | .elseif ((\arg1 + \arg2) % 8 == 6) | ||
| 72 | adcx r14, rax | ||
| 73 | adox r15, rbx | ||
| 74 | .elseif ((\arg1 + \arg2) % 8 == 7) | ||
| 75 | adcx r15, rax | ||
| 76 | adox r8, rbx | ||
| 77 | .endif | ||
| 78 | |||
| 79 | .endm | ||
| 80 | |||
| 81 | // mulpade i, j adds x[i] * rdx (now assumed = y[j]) into the window at i+j | ||
| 82 | // but re-creates the top word assuming nothing to add there | ||
| 83 | |||
| 84 | .macro mulpade arg1,arg2 | ||
| 85 | .if ((\arg1 + \arg2) % 8 == 0) | ||
| 86 | mulx r9, rax, [x+8*\arg1] | ||
| 87 | adcx r8, rax | ||
| 88 | adox r9, zero | ||
| 89 | .elseif ((\arg1 + \arg2) % 8 == 1) | ||
| 90 | mulx r10, rax, [x+8*\arg1] | ||
| 91 | adcx r9, rax | ||
| 92 | adox r10, zero | ||
| 93 | .elseif ((\arg1 + \arg2) % 8 == 2) | ||
| 94 | mulx r11, rax, [x+8*\arg1] | ||
| 95 | adcx r10, rax | ||
| 96 | adox r11, zero | ||
| 97 | .elseif ((\arg1 + \arg2) % 8 == 3) | ||
| 98 | mulx r12, rax, [x+8*\arg1] | ||
| 99 | adcx r11, rax | ||
| 100 | adox r12, zero | ||
| 101 | .elseif ((\arg1 + \arg2) % 8 == 4) | ||
| 102 | mulx r13, rax, [x+8*\arg1] | ||
| 103 | adcx r12, rax | ||
| 104 | adox r13, zero | ||
| 105 | .elseif ((\arg1 + \arg2) % 8 == 5) | ||
| 106 | mulx r14, rax, [x+8*\arg1] | ||
| 107 | adcx r13, rax | ||
| 108 | adox r14, zero | ||
| 109 | .elseif ((\arg1 + \arg2) % 8 == 6) | ||
| 110 | mulx r15, rax, [x+8*\arg1] | ||
| 111 | adcx r14, rax | ||
| 112 | adox r15, zero | ||
| 113 | .elseif ((\arg1 + \arg2) % 8 == 7) | ||
| 114 | mulx r8, rax, [x+8*\arg1] | ||
| 115 | adcx r15, rax | ||
| 116 | adox r8, zero | ||
| 117 | .endif | ||
| 118 | |||
| 119 | .endm | ||
| 120 | |||
| 121 | // Add in the whole j'th row | ||
| 122 | |||
| 123 | .macro addrow arg1 | ||
| 124 | mov rdx, [y+8*\arg1] | ||
| 125 | xor zeroe, zeroe | ||
| 126 | |||
| 127 | mulpadd 0, \arg1 | ||
| 128 | |||
| 129 | .if (\arg1 % 8 == 0) | ||
| 130 | mov [z+8*\arg1],r8 | ||
| 131 | .elseif (\arg1 % 8 == 1) | ||
| 132 | mov [z+8*\arg1],r9 | ||
| 133 | .elseif (\arg1 % 8 == 2) | ||
| 134 | mov [z+8*\arg1],r10 | ||
| 135 | .elseif (\arg1 % 8 == 3) | ||
| 136 | mov [z+8*\arg1],r11 | ||
| 137 | .elseif (\arg1 % 8 == 4) | ||
| 138 | mov [z+8*\arg1],r12 | ||
| 139 | .elseif (\arg1 % 8 == 5) | ||
| 140 | mov [z+8*\arg1],r13 | ||
| 141 | .elseif (\arg1 % 8 == 6) | ||
| 142 | mov [z+8*\arg1],r14 | ||
| 143 | .elseif (\arg1 % 8 == 7) | ||
| 144 | mov [z+8*\arg1],r15 | ||
| 145 | .endif | ||
| 146 | |||
| 147 | mulpadd 1, \arg1 | ||
| 148 | mulpadd 2, \arg1 | ||
| 149 | mulpadd 3, \arg1 | ||
| 150 | mulpadd 4, \arg1 | ||
| 151 | mulpadd 5, \arg1 | ||
| 152 | mulpadd 6, \arg1 | ||
| 153 | mulpade 7, \arg1 | ||
| 154 | |||
| 155 | .if (\arg1 % 8 == 0) | ||
| 156 | adc r8, zero | ||
| 157 | .elseif (\arg1 % 8 == 1) | ||
| 158 | adc r9, zero | ||
| 159 | .elseif (\arg1 % 8 == 2) | ||
| 160 | adc r10, zero | ||
| 161 | .elseif (\arg1 % 8 == 3) | ||
| 162 | adc r11, zero | ||
| 163 | .elseif (\arg1 % 8 == 4) | ||
| 164 | adc r12, zero | ||
| 165 | .elseif (\arg1 % 8 == 5) | ||
| 166 | adc r13, zero | ||
| 167 | .elseif (\arg1 % 8 == 6) | ||
| 168 | adc r14, zero | ||
| 169 | .elseif (\arg1 % 8 == 7) | ||
| 170 | adc r15, zero | ||
| 171 | .endif | ||
| 172 | |||
| 173 | .endm | ||
| 174 | |||
| 175 | |||
| 176 | S2N_BN_SYMBOL(bignum_mul_8_16): | ||
| 177 | _CET_ENDBR | ||
| 178 | |||
| 179 | #if WINDOWS_ABI | ||
| 180 | push rdi | ||
| 181 | push rsi | ||
| 182 | mov rdi, rcx | ||
| 183 | mov rsi, rdx | ||
| 184 | mov rdx, r8 | ||
| 185 | #endif | ||
| 186 | |||
| 187 | // Save more registers to play with | ||
| 188 | |||
| 189 | push rbp | ||
| 190 | push rbx | ||
| 191 | push r12 | ||
| 192 | push r13 | ||
| 193 | push r14 | ||
| 194 | push r15 | ||
| 195 | |||
| 196 | // Copy y into a safe register to start with | ||
| 197 | |||
| 198 | mov y, rdx | ||
| 199 | |||
| 200 | // Zero a register, which also makes sure we don't get a fake carry-in | ||
| 201 | |||
| 202 | xor zeroe, zeroe | ||
| 203 | |||
| 204 | // Do the zeroth row, which is a bit different | ||
| 205 | // Write back the zero-zero product and then accumulate | ||
| 206 | // r8,r15,r14,r13,r12,r11,r10,r9 as y[0] * x from 1..8 | ||
| 207 | |||
| 208 | mov rdx, [y] | ||
| 209 | |||
| 210 | mulx r9, r8, [x] | ||
| 211 | mov [z], r8 | ||
| 212 | |||
| 213 | mulx r10, rbx, [x+8] | ||
| 214 | adc r9, rbx | ||
| 215 | |||
| 216 | mulx r11, rbx, [x+16] | ||
| 217 | adc r10, rbx | ||
| 218 | |||
| 219 | mulx r12, rbx, [x+24] | ||
| 220 | adc r11, rbx | ||
| 221 | |||
| 222 | mulx r13, rbx, [x+32] | ||
| 223 | adc r12, rbx | ||
| 224 | |||
| 225 | mulx r14, rbx, [x+40] | ||
| 226 | adc r13, rbx | ||
| 227 | |||
| 228 | mulx r15, rbx, [x+48] | ||
| 229 | adc r14, rbx | ||
| 230 | |||
| 231 | mulx r8, rbx, [x+56] | ||
| 232 | adc r15, rbx | ||
| 233 | adc r8, zero | ||
| 234 | |||
| 235 | // Now all the other rows in a uniform pattern | ||
| 236 | |||
| 237 | addrow 1 | ||
| 238 | addrow 2 | ||
| 239 | addrow 3 | ||
| 240 | addrow 4 | ||
| 241 | addrow 5 | ||
| 242 | addrow 6 | ||
| 243 | addrow 7 | ||
| 244 | |||
| 245 | // Now write back the additional columns | ||
| 246 | |||
| 247 | mov [z+64], r8 | ||
| 248 | mov [z+72], r9 | ||
| 249 | mov [z+80], r10 | ||
| 250 | mov [z+88], r11 | ||
| 251 | mov [z+96], r12 | ||
| 252 | mov [z+104], r13 | ||
| 253 | mov [z+112], r14 | ||
| 254 | mov [z+120], r15 | ||
| 255 | |||
| 256 | // Real epilog | ||
| 257 | |||
| 258 | pop r15 | ||
| 259 | pop r14 | ||
| 260 | pop r13 | ||
| 261 | pop r12 | ||
| 262 | pop rbx | ||
| 263 | pop rbp | ||
| 264 | |||
| 265 | #if WINDOWS_ABI | ||
| 266 | pop rsi | ||
| 267 | pop rdi | ||
| 268 | #endif | ||
| 269 | ret | ||
| 270 | |||
| 271 | #if defined(__linux__) && defined(__ELF__) | ||
| 272 | .section .note.GNU-stack,"",%progbits | ||
| 273 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S index 066403b074..0e30b9170f 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S | |||
| @@ -1,3 +1,5 @@ | |||
| 1 | // $OpenBSD: bignum_mul_8_16_alt.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
| 2 | // | ||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | // | 4 | // |
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
| @@ -16,8 +18,9 @@ | |||
| 16 | // Multiply z := x * y | 18 | // Multiply z := x * y |
| 17 | // Inputs x[8], y[8]; output z[16] | 19 | // Inputs x[8], y[8]; output z[16] |
| 18 | // | 20 | // |
| 19 | // extern void bignum_mul_8_16_alt | 21 | // extern void bignum_mul_8_16_alt(uint64_t z[static 16], |
| 20 | // (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]); | 22 | // const uint64_t x[static 8], |
| 23 | // const uint64_t y[static 8]); | ||
| 21 | // | 24 | // |
| 22 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y | 25 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y |
| 23 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y | 26 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y |
| @@ -72,7 +75,7 @@ | |||
| 72 | adc h, rdx | 75 | adc h, rdx |
| 73 | 76 | ||
| 74 | S2N_BN_SYMBOL(bignum_mul_8_16_alt): | 77 | S2N_BN_SYMBOL(bignum_mul_8_16_alt): |
| 75 | _CET_ENDBR | 78 | _CET_ENDBR |
| 76 | 79 | ||
| 77 | #if WINDOWS_ABI | 80 | #if WINDOWS_ABI |
| 78 | push rdi | 81 | push rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S index 54e3f59442..86f1af2ac4 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S | |||
| @@ -1,3 +1,5 @@ | |||
| 1 | // $OpenBSD: bignum_sqr.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
| 2 | // | ||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | // | 4 | // |
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
| @@ -16,8 +18,7 @@ | |||
| 16 | // Square z := x^2 | 18 | // Square z := x^2 |
| 17 | // Input x[n]; output z[k] | 19 | // Input x[n]; output z[k] |
| 18 | // | 20 | // |
| 19 | // extern void bignum_sqr | 21 | // extern void bignum_sqr(uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x); |
| 20 | // (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x); | ||
| 21 | // | 22 | // |
| 22 | // Does the "z := x^2" operation where x is n digits and result z is k. | 23 | // Does the "z := x^2" operation where x is n digits and result z is k. |
| 23 | // Truncates the result in general unless k >= 2 * n | 24 | // Truncates the result in general unless k >= 2 * n |
| @@ -62,7 +63,7 @@ | |||
| 62 | #define llshort ebp | 63 | #define llshort ebp |
| 63 | 64 | ||
| 64 | S2N_BN_SYMBOL(bignum_sqr): | 65 | S2N_BN_SYMBOL(bignum_sqr): |
| 65 | _CET_ENDBR | 66 | _CET_ENDBR |
| 66 | 67 | ||
| 67 | #if WINDOWS_ABI | 68 | #if WINDOWS_ABI |
| 68 | push rdi | 69 | push rdi |
| @@ -86,7 +87,7 @@ S2N_BN_SYMBOL(bignum_sqr): | |||
| 86 | // If p = 0 the result is trivial and nothing needs doing | 87 | // If p = 0 the result is trivial and nothing needs doing |
| 87 | 88 | ||
| 88 | test p, p | 89 | test p, p |
| 89 | jz end | 90 | jz bignum_sqr_end |
| 90 | 91 | ||
| 91 | // initialize (hh,ll) = 0 | 92 | // initialize (hh,ll) = 0 |
| 92 | 93 | ||
| @@ -97,7 +98,7 @@ S2N_BN_SYMBOL(bignum_sqr): | |||
| 97 | 98 | ||
| 98 | xor k, k | 99 | xor k, k |
| 99 | 100 | ||
| 100 | outerloop: | 101 | bignum_sqr_outerloop: |
| 101 | 102 | ||
| 102 | // First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n | 103 | // First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n |
| 103 | // We want to accumulate all x[i] * x[k - i] for bot <= i < top | 104 | // We want to accumulate all x[i] * x[k - i] for bot <= i < top |
| @@ -122,7 +123,7 @@ outerloop: | |||
| 122 | // If htop <= bot then main doubled part of the sum is empty | 123 | // If htop <= bot then main doubled part of the sum is empty |
| 123 | 124 | ||
| 124 | cmp i, htop | 125 | cmp i, htop |
| 125 | jnc nosumming | 126 | jnc bignum_sqr_nosumming |
| 126 | 127 | ||
| 127 | // Use a moving pointer for [y] = x[k-i] for the cofactor | 128 | // Use a moving pointer for [y] = x[k-i] for the cofactor |
| 128 | 129 | ||
| @@ -132,7 +133,7 @@ outerloop: | |||
| 132 | 133 | ||
| 133 | // Do the main part of the sum x[i] * x[k - i] for 2 * i < k | 134 | // Do the main part of the sum x[i] * x[k - i] for 2 * i < k |
| 134 | 135 | ||
| 135 | innerloop: | 136 | bignum_sqr_innerloop: |
| 136 | mov a, [x+8*i] | 137 | mov a, [x+8*i] |
| 137 | mul QWORD PTR [y] | 138 | mul QWORD PTR [y] |
| 138 | add l, a | 139 | add l, a |
| @@ -141,7 +142,7 @@ innerloop: | |||
| 141 | sub y, 8 | 142 | sub y, 8 |
| 142 | inc i | 143 | inc i |
| 143 | cmp i, htop | 144 | cmp i, htop |
| 144 | jc innerloop | 145 | jc bignum_sqr_innerloop |
| 145 | 146 | ||
| 146 | // Now double it | 147 | // Now double it |
| 147 | 148 | ||
| @@ -151,11 +152,11 @@ innerloop: | |||
| 151 | 152 | ||
| 152 | // If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term | 153 | // If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term |
| 153 | 154 | ||
| 154 | nosumming: | 155 | bignum_sqr_nosumming: |
| 155 | test k, 1 | 156 | test k, 1 |
| 156 | jnz innerend | 157 | jnz bignum_sqr_innerend |
| 157 | cmp i, n | 158 | cmp i, n |
| 158 | jnc innerend | 159 | jnc bignum_sqr_innerend |
| 159 | 160 | ||
| 160 | mov a, [x+8*i] | 161 | mov a, [x+8*i] |
| 161 | mul a | 162 | mul a |
| @@ -165,7 +166,7 @@ nosumming: | |||
| 165 | 166 | ||
| 166 | // Now add the local sum into the global sum, store and shift | 167 | // Now add the local sum into the global sum, store and shift |
| 167 | 168 | ||
| 168 | innerend: | 169 | bignum_sqr_innerend: |
| 169 | add l, ll | 170 | add l, ll |
| 170 | mov [z+8*k], l | 171 | mov [z+8*k], l |
| 171 | adc h, hh | 172 | adc h, hh |
| @@ -175,11 +176,11 @@ innerend: | |||
| 175 | 176 | ||
| 176 | inc k | 177 | inc k |
| 177 | cmp k, p | 178 | cmp k, p |
| 178 | jc outerloop | 179 | jc bignum_sqr_outerloop |
| 179 | 180 | ||
| 180 | // Restore registers and return | 181 | // Restore registers and return |
| 181 | 182 | ||
| 182 | end: | 183 | bignum_sqr_end: |
| 183 | pop r15 | 184 | pop r15 |
| 184 | pop r14 | 185 | pop r14 |
| 185 | pop r13 | 186 | pop r13 |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8.S new file mode 100644 index 0000000000..25664782f7 --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8.S | |||
| @@ -0,0 +1,158 @@ | |||
| 1 | // $OpenBSD: bignum_sqr_4_8.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
| 2 | // | ||
| 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 4 | // | ||
| 5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 6 | // purpose with or without fee is hereby granted, provided that the above | ||
| 7 | // copyright notice and this permission notice appear in all copies. | ||
| 8 | // | ||
| 9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 16 | |||
| 17 | // ---------------------------------------------------------------------------- | ||
| 18 | // Square, z := x^2 | ||
| 19 | // Input x[4]; output z[8] | ||
| 20 | // | ||
| 21 | // extern void bignum_sqr_4_8(uint64_t z[static 8], const uint64_t x[static 4]); | ||
| 22 | // | ||
| 23 | // Standard x86-64 ABI: RDI = z, RSI = x | ||
| 24 | // Microsoft x64 ABI: RCX = z, RDX = x | ||
| 25 | // ---------------------------------------------------------------------------- | ||
| 26 | |||
| 27 | #include "s2n_bignum_internal.h" | ||
| 28 | |||
| 29 | .intel_syntax noprefix | ||
| 30 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_4_8) | ||
| 31 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_4_8) | ||
| 32 | .text | ||
| 33 | |||
| 34 | // These are actually right | ||
| 35 | |||
| 36 | #define z rdi | ||
| 37 | #define x rsi | ||
| 38 | |||
| 39 | // A zero register | ||
| 40 | |||
| 41 | #define zero rbp | ||
| 42 | #define zeroe ebp | ||
| 43 | |||
| 44 | // Other registers | ||
| 45 | |||
| 46 | #define d1 r8 | ||
| 47 | #define d2 r9 | ||
| 48 | #define d3 r10 | ||
| 49 | #define d4 r11 | ||
| 50 | #define d5 r12 | ||
| 51 | #define d6 r13 | ||
| 52 | |||
| 53 | |||
| 54 | |||
| 55 | S2N_BN_SYMBOL(bignum_sqr_4_8): | ||
| 56 | _CET_ENDBR | ||
| 57 | |||
| 58 | #if WINDOWS_ABI | ||
| 59 | push rdi | ||
| 60 | push rsi | ||
| 61 | mov rdi, rcx | ||
| 62 | mov rsi, rdx | ||
| 63 | #endif | ||
| 64 | |||
| 65 | // Save more registers to play with | ||
| 66 | |||
| 67 | push rbp | ||
| 68 | push r12 | ||
| 69 | push r13 | ||
| 70 | |||
| 71 | // Set up an initial window [d6;...d1] = [23;03;01] | ||
| 72 | |||
| 73 | mov rdx, [x] | ||
| 74 | mulx d2, d1, [x+8] | ||
| 75 | mulx d4, d3, [x+24] | ||
| 76 | mov rdx, [x+16] | ||
| 77 | mulx d6, d5, [x+24] | ||
| 78 | |||
| 79 | // Clear our zero register, and also initialize the flags for the carry chain | ||
| 80 | |||
| 81 | xor zeroe, zeroe | ||
| 82 | |||
| 83 | // Chain in the addition of 02 + 12 + 13 to that window (no carry-out possible) | ||
| 84 | // This gives all the "heterogeneous" terms of the squaring ready to double | ||
| 85 | |||
| 86 | mulx rcx, rax, [x] | ||
| 87 | adcx d2, rax | ||
| 88 | adox d3, rcx | ||
| 89 | mulx rcx, rax, [x+8] | ||
| 90 | adcx d3, rax | ||
| 91 | adox d4, rcx | ||
| 92 | mov rdx, [x+24] | ||
| 93 | mulx rcx, rax, [x+8] | ||
| 94 | adcx d4, rax | ||
| 95 | adox d5, rcx | ||
| 96 | adcx d5, zero | ||
| 97 | adox d6, zero | ||
| 98 | adcx d6, zero | ||
| 99 | |||
| 100 | // In principle this is otiose as CF and OF carries are absorbed at this point | ||
| 101 | // However it seems helpful for the OOO engine to be told it's a fresh start | ||
| 102 | |||
| 103 | xor zeroe, zeroe | ||
| 104 | |||
| 105 | // Double and add to the 00 + 11 + 22 + 33 terms | ||
| 106 | // | ||
| 107 | // We could use shift-double but this seems tidier and in larger squarings | ||
| 108 | // it was actually more efficient. I haven't experimented with this small | ||
| 109 | // case to see how much that matters. Note: the writeback here is sprinkled | ||
| 110 | // into the sequence in such a way that things still work if z = x, i.e. if | ||
| 111 | // the output overwrites the input buffer and beyond. | ||
| 112 | |||
| 113 | mov rdx, [x] | ||
| 114 | mulx rdx, rax, rdx | ||
| 115 | mov [z], rax | ||
| 116 | adcx d1, d1 | ||
| 117 | adox d1, rdx | ||
| 118 | mov rdx, [x+8] | ||
| 119 | mov [z+8], d1 | ||
| 120 | mulx rdx, rax, rdx | ||
| 121 | adcx d2, d2 | ||
| 122 | adox d2, rax | ||
| 123 | adcx d3, d3 | ||
| 124 | adox d3, rdx | ||
| 125 | mov rdx, [x+16] | ||
| 126 | mov [z+16], d2 | ||
| 127 | mulx rdx, rax, rdx | ||
| 128 | adcx d4, d4 | ||
| 129 | adox d4, rax | ||
| 130 | adcx d5, d5 | ||
| 131 | adox d5, rdx | ||
| 132 | mov rdx, [x+24] | ||
| 133 | mov [z+24], d3 | ||
| 134 | mulx rdx, rax, rdx | ||
| 135 | mov [z+32], d4 | ||
| 136 | adcx d6, d6 | ||
| 137 | mov [z+40], d5 | ||
| 138 | adox d6, rax | ||
| 139 | mov [z+48], d6 | ||
| 140 | adcx rdx, zero | ||
| 141 | adox rdx, zero | ||
| 142 | mov [z+56], rdx | ||
| 143 | |||
| 144 | // Restore saved registers and return | ||
| 145 | |||
| 146 | pop r13 | ||
| 147 | pop r12 | ||
| 148 | pop rbp | ||
| 149 | |||
| 150 | #if WINDOWS_ABI | ||
| 151 | pop rsi | ||
| 152 | pop rdi | ||
| 153 | #endif | ||
| 154 | ret | ||
| 155 | |||
| 156 | #if defined(__linux__) && defined(__ELF__) | ||
| 157 | .section .note.GNU-stack,"",%progbits | ||
| 158 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S index 7c534ae907..7eafac3284 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S | |||
| @@ -1,3 +1,5 @@ | |||
| 1 | // $OpenBSD: bignum_sqr_4_8_alt.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
| 2 | // | ||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | // | 4 | // |
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
| @@ -16,8 +18,8 @@ | |||
| 16 | // Square, z := x^2 | 18 | // Square, z := x^2 |
| 17 | // Input x[4]; output z[8] | 19 | // Input x[4]; output z[8] |
| 18 | // | 20 | // |
| 19 | // extern void bignum_sqr_4_8_alt | 21 | // extern void bignum_sqr_4_8_alt(uint64_t z[static 8], |
| 20 | // (uint64_t z[static 8], uint64_t x[static 4]); | 22 | // const uint64_t x[static 4]); |
| 21 | // | 23 | // |
| 22 | // Standard x86-64 ABI: RDI = z, RSI = x | 24 | // Standard x86-64 ABI: RDI = z, RSI = x |
| 23 | // Microsoft x64 ABI: RCX = z, RDX = x | 25 | // Microsoft x64 ABI: RCX = z, RDX = x |
| @@ -71,7 +73,7 @@ | |||
| 71 | adc c, 0 | 73 | adc c, 0 |
| 72 | 74 | ||
| 73 | S2N_BN_SYMBOL(bignum_sqr_4_8_alt): | 75 | S2N_BN_SYMBOL(bignum_sqr_4_8_alt): |
| 74 | _CET_ENDBR | 76 | _CET_ENDBR |
| 75 | 77 | ||
| 76 | #if WINDOWS_ABI | 78 | #if WINDOWS_ABI |
| 77 | push rdi | 79 | push rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12.S new file mode 100644 index 0000000000..3f055e8b75 --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12.S | |||
| @@ -0,0 +1,227 @@ | |||
| 1 | // $OpenBSD: bignum_sqr_6_12.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
| 2 | // | ||
| 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 4 | // | ||
| 5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 6 | // purpose with or without fee is hereby granted, provided that the above | ||
| 7 | // copyright notice and this permission notice appear in all copies. | ||
| 8 | // | ||
| 9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 16 | |||
| 17 | // ---------------------------------------------------------------------------- | ||
| 18 | // Square, z := x^2 | ||
| 19 | // Input x[6]; output z[12] | ||
| 20 | // | ||
| 21 | // extern void bignum_sqr_6_12(uint64_t z[static 12], const uint64_t x[static 6]); | ||
| 22 | // | ||
| 23 | // Standard x86-64 ABI: RDI = z, RSI = x | ||
| 24 | // Microsoft x64 ABI: RCX = z, RDX = x | ||
| 25 | // ---------------------------------------------------------------------------- | ||
| 26 | |||
| 27 | #include "s2n_bignum_internal.h" | ||
| 28 | |||
| 29 | .intel_syntax noprefix | ||
| 30 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_6_12) | ||
| 31 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_6_12) | ||
| 32 | .text | ||
| 33 | |||
| 34 | // These are actually right | ||
| 35 | |||
| 36 | #define z rdi | ||
| 37 | #define x rsi | ||
| 38 | |||
| 39 | // A zero register | ||
| 40 | |||
| 41 | #define zero rbp | ||
| 42 | #define zeroe ebp | ||
| 43 | |||
| 44 | // Other registers | ||
| 45 | |||
| 46 | #define d1 r8 | ||
| 47 | #define d2 r9 | ||
| 48 | #define d3 r10 | ||
| 49 | #define d4 r11 | ||
| 50 | #define d5 r12 | ||
| 51 | #define d6 r13 | ||
| 52 | #define d7 r14 | ||
| 53 | #define d8 r15 | ||
| 54 | #define d9 rbx | ||
| 55 | |||
| 56 | // Care is needed: re-using the zero register | ||
| 57 | |||
| 58 | #define d10 rbp | ||
| 59 | |||
| 60 | |||
| 61 | S2N_BN_SYMBOL(bignum_sqr_6_12): | ||
| 62 | _CET_ENDBR | ||
| 63 | |||
| 64 | #if WINDOWS_ABI | ||
| 65 | push rdi | ||
| 66 | push rsi | ||
| 67 | mov rdi, rcx | ||
| 68 | mov rsi, rdx | ||
| 69 | #endif | ||
| 70 | |||
| 71 | // Save more registers to play with | ||
| 72 | |||
| 73 | push rbp | ||
| 74 | push rbx | ||
| 75 | push r12 | ||
| 76 | push r13 | ||
| 77 | push r14 | ||
| 78 | push r15 | ||
| 79 | |||
| 80 | // Set up an initial window [d8;...d1] = [34;05;03;01] | ||
| 81 | |||
| 82 | mov rdx, [x] | ||
| 83 | mulx d2, d1, [x+8] | ||
| 84 | mulx d4, d3, [x+24] | ||
| 85 | mulx d6, d5, [x+40] | ||
| 86 | mov rdx, [x+24] | ||
| 87 | mulx d8, d7, [x+32] | ||
| 88 | |||
| 89 | // Clear our zero register, and also initialize the flags for the carry chain | ||
| 90 | |||
| 91 | xor zeroe, zeroe | ||
| 92 | |||
| 93 | // Chain in the addition of 02 + 12 + 13 + 14 + 15 to that window | ||
| 94 | // (no carry-out possible since we add it to the top of a product) | ||
| 95 | |||
| 96 | mov rdx, [x+16] | ||
| 97 | mulx rcx, rax, [x] | ||
| 98 | adcx d2, rax | ||
| 99 | adox d3, rcx | ||
| 100 | mulx rcx, rax, [x+8] | ||
| 101 | adcx d3, rax | ||
| 102 | adox d4, rcx | ||
| 103 | mov rdx, [x+8] | ||
| 104 | mulx rcx, rax, [x+24] | ||
| 105 | adcx d4, rax | ||
| 106 | adox d5, rcx | ||
| 107 | mulx rcx, rax, [x+32] | ||
| 108 | adcx d5, rax | ||
| 109 | adox d6, rcx | ||
| 110 | mulx rcx, rax, [x+40] | ||
| 111 | adcx d6, rax | ||
| 112 | adox d7, rcx | ||
| 113 | adcx d7, zero | ||
| 114 | adox d8, zero | ||
| 115 | adcx d8, zero | ||
| 116 | |||
| 117 | // Again zero out the flags. Actually they are already cleared but it may | ||
| 118 | // help decouple these in the OOO engine not to wait for the chain above | ||
| 119 | |||
| 120 | xor zeroe, zeroe | ||
| 121 | |||
| 122 | // Now chain in the 04 + 23 + 24 + 25 + 35 + 45 terms | ||
| 123 | // We are running out of registers and here our zero register is not zero! | ||
| 124 | |||
| 125 | mov rdx, [x+32] | ||
| 126 | mulx rcx, rax, [x] | ||
| 127 | adcx d4, rax | ||
| 128 | adox d5, rcx | ||
| 129 | mov rdx, [x+16] | ||
| 130 | mulx rcx, rax, [x+24] | ||
| 131 | adcx d5, rax | ||
| 132 | adox d6, rcx | ||
| 133 | mulx rcx, rax, [x+32] | ||
| 134 | adcx d6, rax | ||
| 135 | adox d7, rcx | ||
| 136 | mulx rcx, rax, [x+40] | ||
| 137 | adcx d7, rax | ||
| 138 | adox d8, rcx | ||
| 139 | mov rdx, [x+24] | ||
| 140 | mulx d9, rax, [x+40] | ||
| 141 | adcx d8, rax | ||
| 142 | adox d9, zero | ||
| 143 | mov rdx, [x+32] | ||
| 144 | mulx d10, rax, [x+40] | ||
| 145 | adcx d9, rax | ||
| 146 | mov eax, 0 | ||
| 147 | adox d10, rax | ||
| 148 | adcx d10, rax | ||
| 149 | |||
| 150 | // Again, just for a clear fresh start for the flags | ||
| 151 | |||
| 152 | xor eax, eax | ||
| 153 | |||
| 154 | // Double and add to the 00 + 11 + 22 + 33 + 44 + 55 terms | ||
| 155 | // | ||
| 156 | // We could use shift-double but this seems tidier and in larger squarings | ||
| 157 | // it was actually more efficient. I haven't experimented with this small | ||
| 158 | // case to see how much that matters. Note: the writeback here is sprinkled | ||
| 159 | // into the sequence in such a way that things still work if z = x, i.e. if | ||
| 160 | // the output overwrites the input buffer and beyond. | ||
| 161 | |||
| 162 | mov rdx, [x] | ||
| 163 | mulx rdx, rax, rdx | ||
| 164 | mov [z], rax | ||
| 165 | adcx d1, d1 | ||
| 166 | adox d1, rdx | ||
| 167 | mov rdx, [x+8] | ||
| 168 | mov [z+8], d1 | ||
| 169 | mulx rdx, rax, rdx | ||
| 170 | adcx d2, d2 | ||
| 171 | adox d2, rax | ||
| 172 | adcx d3, d3 | ||
| 173 | adox d3, rdx | ||
| 174 | mov rdx, [x+16] | ||
| 175 | mov [z+16], d2 | ||
| 176 | mulx rdx, rax, rdx | ||
| 177 | adcx d4, d4 | ||
| 178 | adox d4, rax | ||
| 179 | adcx d5, d5 | ||
| 180 | adox d5, rdx | ||
| 181 | mov rdx, [x+24] | ||
| 182 | mov [z+24], d3 | ||
| 183 | mulx rdx, rax, rdx | ||
| 184 | adcx d6, d6 | ||
| 185 | adox d6, rax | ||
| 186 | adcx d7, d7 | ||
| 187 | adox d7, rdx | ||
| 188 | mov rdx, [x+32] | ||
| 189 | mov [z+32], d4 | ||
| 190 | mulx rdx, rax, rdx | ||
| 191 | adcx d8, d8 | ||
| 192 | adox d8, rax | ||
| 193 | adcx d9, d9 | ||
| 194 | adox d9, rdx | ||
| 195 | mov rdx, [x+40] | ||
| 196 | mov [z+40], d5 | ||
| 197 | mulx rdx, rax, rdx | ||
| 198 | mov [z+48], d6 | ||
| 199 | adcx d10, d10 | ||
| 200 | mov [z+56], d7 | ||
| 201 | adox d10, rax | ||
| 202 | mov [z+64], d8 | ||
| 203 | mov eax, 0 | ||
| 204 | mov [z+72], d9 | ||
| 205 | adcx rdx, rax | ||
| 206 | mov [z+80], d10 | ||
| 207 | adox rdx, rax | ||
| 208 | mov [z+88], rdx | ||
| 209 | |||
| 210 | // Restore saved registers and return | ||
| 211 | |||
| 212 | pop r15 | ||
| 213 | pop r14 | ||
| 214 | pop r13 | ||
| 215 | pop r12 | ||
| 216 | pop rbx | ||
| 217 | pop rbp | ||
| 218 | |||
| 219 | #if WINDOWS_ABI | ||
| 220 | pop rsi | ||
| 221 | pop rdi | ||
| 222 | #endif | ||
| 223 | ret | ||
| 224 | |||
| 225 | #if defined(__linux__) && defined(__ELF__) | ||
| 226 | .section .note.GNU-stack,"",%progbits | ||
| 227 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12_alt.S new file mode 100644 index 0000000000..eb43b0a15b --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_6_12_alt.S | |||
| @@ -0,0 +1,210 @@ | |||
| 1 | // $OpenBSD: bignum_sqr_6_12_alt.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
| 2 | // | ||
| 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 4 | // | ||
| 5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 6 | // purpose with or without fee is hereby granted, provided that the above | ||
| 7 | // copyright notice and this permission notice appear in all copies. | ||
| 8 | // | ||
| 9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 16 | |||
| 17 | // ---------------------------------------------------------------------------- | ||
| 18 | // Square, z := x^2 | ||
| 19 | // Input x[6]; output z[12] | ||
| 20 | // | ||
| 21 | // extern void bignum_sqr_6_12_alt(uint64_t z[static 12], | ||
| 22 | // const uint64_t x[static 6]); | ||
| 23 | // | ||
| 24 | // Standard x86-64 ABI: RDI = z, RSI = x | ||
| 25 | // Microsoft x64 ABI: RCX = z, RDX = x | ||
| 26 | // ---------------------------------------------------------------------------- | ||
| 27 | |||
| 28 | #include "s2n_bignum_internal.h" | ||
| 29 | |||
| 30 | .intel_syntax noprefix | ||
| 31 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_6_12_alt) | ||
| 32 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_6_12_alt) | ||
| 33 | .text | ||
| 34 | |||
| 35 | // Input arguments | ||
| 36 | |||
| 37 | #define z rdi | ||
| 38 | #define x rsi | ||
| 39 | |||
| 40 | // Other variables used as a rotating 3-word window to add terms to | ||
| 41 | |||
| 42 | #define t0 r8 | ||
| 43 | #define t1 r9 | ||
| 44 | #define t2 r10 | ||
| 45 | |||
| 46 | // Additional temporaries for local windows to share doublings | ||
| 47 | |||
| 48 | #define u0 rcx | ||
| 49 | #define u1 r11 | ||
| 50 | |||
| 51 | // Macro for the key "multiply and add to (c,h,l)" step | ||
| 52 | |||
| 53 | #define combadd(c,h,l,numa,numb) \ | ||
| 54 | mov rax, numa; \ | ||
| 55 | mul QWORD PTR numb; \ | ||
| 56 | add l, rax; \ | ||
| 57 | adc h, rdx; \ | ||
| 58 | adc c, 0 | ||
| 59 | |||
| 60 | // Set up initial window (c,h,l) = numa * numb | ||
| 61 | |||
| 62 | #define combaddz(c,h,l,numa,numb) \ | ||
| 63 | mov rax, numa; \ | ||
| 64 | mul QWORD PTR numb; \ | ||
| 65 | xor c, c; \ | ||
| 66 | mov l, rax; \ | ||
| 67 | mov h, rdx | ||
| 68 | |||
| 69 | // Doubling step (c,h,l) = 2 * (c,hh,ll) + (0,h,l) | ||
| 70 | |||
| 71 | #define doubladd(c,h,l,hh,ll) \ | ||
| 72 | add ll, ll; \ | ||
| 73 | adc hh, hh; \ | ||
| 74 | adc c, c; \ | ||
| 75 | add l, ll; \ | ||
| 76 | adc h, hh; \ | ||
| 77 | adc c, 0 | ||
| 78 | |||
| 79 | // Square term incorporation (c,h,l) += numba^2 | ||
| 80 | |||
| 81 | #define combadd1(c,h,l,numa) \ | ||
| 82 | mov rax, numa; \ | ||
| 83 | mul rax; \ | ||
| 84 | add l, rax; \ | ||
| 85 | adc h, rdx; \ | ||
| 86 | adc c, 0 | ||
| 87 | |||
| 88 | // A short form where we don't expect a top carry | ||
| 89 | |||
| 90 | #define combads(h,l,numa) \ | ||
| 91 | mov rax, numa; \ | ||
| 92 | mul rax; \ | ||
| 93 | add l, rax; \ | ||
| 94 | adc h, rdx | ||
| 95 | |||
| 96 | // A version doubling directly before adding, for single non-square terms | ||
| 97 | |||
| 98 | #define combadd2(c,h,l,numa,numb) \ | ||
| 99 | mov rax, numa; \ | ||
| 100 | mul QWORD PTR numb; \ | ||
| 101 | add rax, rax; \ | ||
| 102 | adc rdx, rdx; \ | ||
| 103 | adc c, 0; \ | ||
| 104 | add l, rax; \ | ||
| 105 | adc h, rdx; \ | ||
| 106 | adc c, 0 | ||
| 107 | |||
| 108 | S2N_BN_SYMBOL(bignum_sqr_6_12_alt): | ||
| 109 | _CET_ENDBR | ||
| 110 | |||
| 111 | #if WINDOWS_ABI | ||
| 112 | push rdi | ||
| 113 | push rsi | ||
| 114 | mov rdi, rcx | ||
| 115 | mov rsi, rdx | ||
| 116 | #endif | ||
| 117 | |||
| 118 | // Result term 0 | ||
| 119 | |||
| 120 | mov rax, [x] | ||
| 121 | mul rax | ||
| 122 | |||
| 123 | mov [z], rax | ||
| 124 | mov t0, rdx | ||
| 125 | xor t1, t1 | ||
| 126 | |||
| 127 | // Result term 1 | ||
| 128 | |||
| 129 | xor t2, t2 | ||
| 130 | combadd2(t2,t1,t0,[x],[x+8]) | ||
| 131 | mov [z+8], t0 | ||
| 132 | |||
| 133 | // Result term 2 | ||
| 134 | |||
| 135 | xor t0, t0 | ||
| 136 | combadd1(t0,t2,t1,[x+8]) | ||
| 137 | combadd2(t0,t2,t1,[x],[x+16]) | ||
| 138 | mov [z+16], t1 | ||
| 139 | |||
| 140 | // Result term 3 | ||
| 141 | |||
| 142 | combaddz(t1,u1,u0,[x],[x+24]) | ||
| 143 | combadd(t1,u1,u0,[x+8],[x+16]) | ||
| 144 | doubladd(t1,t0,t2,u1,u0) | ||
| 145 | mov [z+24], t2 | ||
| 146 | |||
| 147 | // Result term 4 | ||
| 148 | |||
| 149 | combaddz(t2,u1,u0,[x],[x+32]) | ||
| 150 | combadd(t2,u1,u0,[x+8],[x+24]) | ||
| 151 | doubladd(t2,t1,t0,u1,u0) | ||
| 152 | combadd1(t2,t1,t0,[x+16]) | ||
| 153 | mov [z+32], t0 | ||
| 154 | |||
| 155 | // Result term 5 | ||
| 156 | |||
| 157 | combaddz(t0,u1,u0,[x],[x+40]) | ||
| 158 | combadd(t0,u1,u0,[x+8],[x+32]) | ||
| 159 | combadd(t0,u1,u0,[x+16],[x+24]) | ||
| 160 | doubladd(t0,t2,t1,u1,u0) | ||
| 161 | mov [z+40], t1 | ||
| 162 | |||
| 163 | // Result term 6 | ||
| 164 | |||
| 165 | combaddz(t1,u1,u0,[x+8],[x+40]) | ||
| 166 | combadd(t1,u1,u0,[x+16],[x+32]) | ||
| 167 | doubladd(t1,t0,t2,u1,u0) | ||
| 168 | combadd1(t1,t0,t2,[x+24]) | ||
| 169 | mov [z+48], t2 | ||
| 170 | |||
| 171 | // Result term 7 | ||
| 172 | |||
| 173 | combaddz(t2,u1,u0,[x+16],[x+40]) | ||
| 174 | combadd(t2,u1,u0,[x+24],[x+32]) | ||
| 175 | doubladd(t2,t1,t0,u1,u0) | ||
| 176 | mov [z+56], t0 | ||
| 177 | |||
| 178 | // Result term 8 | ||
| 179 | |||
| 180 | xor t0, t0 | ||
| 181 | combadd2(t0,t2,t1,[x+24],[x+40]) | ||
| 182 | combadd1(t0,t2,t1,[x+32]) | ||
| 183 | mov [z+64], t1 | ||
| 184 | |||
| 185 | // Result term 9 | ||
| 186 | |||
| 187 | xor t1, t1 | ||
| 188 | combadd2(t1,t0,t2,[x+32],[x+40]) | ||
| 189 | mov [z+72], t2 | ||
| 190 | |||
| 191 | // Result term 10 | ||
| 192 | |||
| 193 | combads(t1,t0,[x+40]) | ||
| 194 | mov [z+80], t0 | ||
| 195 | |||
| 196 | // Result term 11 | ||
| 197 | |||
| 198 | mov [z+88], t1 | ||
| 199 | |||
| 200 | // Return | ||
| 201 | |||
| 202 | #if WINDOWS_ABI | ||
| 203 | pop rsi | ||
| 204 | pop rdi | ||
| 205 | #endif | ||
| 206 | ret | ||
| 207 | |||
| 208 | #if defined(__linux__) && defined(__ELF__) | ||
| 209 | .section .note.GNU-stack,"",%progbits | ||
| 210 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16.S new file mode 100644 index 0000000000..41277b5b6a --- /dev/null +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16.S | |||
| @@ -0,0 +1,311 @@ | |||
| 1 | // $OpenBSD: bignum_sqr_8_16.S,v 1.4 2025/08/12 10:23:40 jsing Exp $ | ||
| 2 | // | ||
| 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 4 | // | ||
| 5 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 6 | // purpose with or without fee is hereby granted, provided that the above | ||
| 7 | // copyright notice and this permission notice appear in all copies. | ||
| 8 | // | ||
| 9 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 10 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 11 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 12 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 13 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 14 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 15 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 16 | |||
| 17 | // ---------------------------------------------------------------------------- | ||
| 18 | // Square, z := x^2 | ||
| 19 | // Input x[8]; output z[16] | ||
| 20 | // | ||
| 21 | // extern void bignum_sqr_8_16(uint64_t z[static 16], const uint64_t x[static 8]); | ||
| 22 | // | ||
| 23 | // Standard x86-64 ABI: RDI = z, RSI = x | ||
| 24 | // Microsoft x64 ABI: RCX = z, RDX = x | ||
| 25 | // ---------------------------------------------------------------------------- | ||
| 26 | |||
| 27 | #include "s2n_bignum_internal.h" | ||
| 28 | |||
| 29 | .intel_syntax noprefix | ||
| 30 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_8_16) | ||
| 31 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_8_16) | ||
| 32 | .text | ||
| 33 | |||
| 34 | // These are actually right | ||
| 35 | |||
| 36 | #define z rdi | ||
| 37 | #define x rsi | ||
| 38 | |||
| 39 | // A zero register | ||
| 40 | |||
| 41 | #define zero rbp | ||
| 42 | #define zeroe ebp | ||
| 43 | |||
| 44 | // mulpadd i, j adds rdx * x[i] into the window at the i+j point | ||
| 45 | |||
| 46 | .macro mulpadd arg1,arg2 | ||
| 47 | mulx rcx, rax, [x+8*\arg1] | ||
| 48 | .if ((\arg1 + \arg2) % 8 == 0) | ||
| 49 | adcx r8, rax | ||
| 50 | adox r9, rcx | ||
| 51 | .elseif ((\arg1 + \arg2) % 8 == 1) | ||
| 52 | adcx r9, rax | ||
| 53 | adox r10, rcx | ||
| 54 | .elseif ((\arg1 + \arg2) % 8 == 2) | ||
| 55 | adcx r10, rax | ||
| 56 | adox r11, rcx | ||
| 57 | .elseif ((\arg1 + \arg2) % 8 == 3) | ||
| 58 | adcx r11, rax | ||
| 59 | adox r12, rcx | ||
| 60 | .elseif ((\arg1 + \arg2) % 8 == 4) | ||
| 61 | adcx r12, rax | ||
| 62 | adox r13, rcx | ||
| 63 | .elseif ((\arg1 + \arg2) % 8 == 5) | ||
| 64 | adcx r13, rax | ||
| 65 | adox r14, rcx | ||
| 66 | .elseif ((\arg1 + \arg2) % 8 == 6) | ||
| 67 | adcx r14, rax | ||
| 68 | adox r15, rcx | ||
| 69 | .elseif ((\arg1 + \arg2) % 8 == 7) | ||
| 70 | adcx r15, rax | ||
| 71 | adox r8, rcx | ||
| 72 | .endif | ||
| 73 | |||
| 74 | .endm | ||
| 75 | |||
| 76 | // mulpade i, j adds rdx * x[i] into the window at i+j | ||
| 77 | // but re-creates the top word assuming nothing to add there | ||
| 78 | |||
| 79 | .macro mulpade arg1,arg2 | ||
| 80 | .if ((\arg1 + \arg2) % 8 == 0) | ||
| 81 | mulx r9, rax, [x+8*\arg1] | ||
| 82 | adcx r8, rax | ||
| 83 | adox r9, zero | ||
| 84 | .elseif ((\arg1 + \arg2) % 8 == 1) | ||
| 85 | mulx r10, rax, [x+8*\arg1] | ||
| 86 | adcx r9, rax | ||
| 87 | adox r10, zero | ||
| 88 | .elseif ((\arg1 + \arg2) % 8 == 2) | ||
| 89 | mulx r11, rax, [x+8*\arg1] | ||
| 90 | adcx r10, rax | ||
| 91 | adox r11, zero | ||
| 92 | .elseif ((\arg1 + \arg2) % 8 == 3) | ||
| 93 | mulx r12, rax, [x+8*\arg1] | ||
| 94 | adcx r11, rax | ||
| 95 | adox r12, zero | ||
| 96 | .elseif ((\arg1 + \arg2) % 8 == 4) | ||
| 97 | mulx r13, rax, [x+8*\arg1] | ||
| 98 | adcx r12, rax | ||
| 99 | adox r13, zero | ||
| 100 | .elseif ((\arg1 + \arg2) % 8 == 5) | ||
| 101 | mulx r14, rax, [x+8*\arg1] | ||
| 102 | adcx r13, rax | ||
| 103 | adox r14, zero | ||
| 104 | .elseif ((\arg1 + \arg2) % 8 == 6) | ||
| 105 | mulx r15, rax, [x+8*\arg1] | ||
| 106 | adcx r14, rax | ||
| 107 | adox r15, zero | ||
| 108 | .elseif ((\arg1 + \arg2) % 8 == 7) | ||
| 109 | mulx r8, rax, [x+8*\arg1] | ||
| 110 | adcx r15, rax | ||
| 111 | adox r8, zero | ||
| 112 | .endif | ||
| 113 | |||
| 114 | .endm | ||
| 115 | |||
| 116 | .macro diagonals | ||
| 117 | |||
| 118 | xor zeroe, zeroe | ||
| 119 | |||
| 120 | // Set initial window [r8..r10] + 2 wb = 10 + 20 + 30 + 40 + 50 + 60 + 70 | ||
| 121 | |||
| 122 | mov rdx, [x] | ||
| 123 | mulx rax, r9, [x+8] | ||
| 124 | mov [z+8], r9 | ||
| 125 | mulx rcx, r10, [x+16] | ||
| 126 | adcx r10, rax | ||
| 127 | mov [z+16], r10 | ||
| 128 | mulx rax, r11, [x+24] | ||
| 129 | adcx r11, rcx | ||
| 130 | mulx rcx, r12, [x+32] | ||
| 131 | adcx r12, rax | ||
| 132 | mulx rax, r13, [x+40] | ||
| 133 | adcx r13, rcx | ||
| 134 | mulx rcx, r14, [x+48] | ||
| 135 | adcx r14, rax | ||
| 136 | mulx r8, r15, [x+56] | ||
| 137 | adcx r15, rcx | ||
| 138 | adcx r8, zero | ||
| 139 | |||
| 140 | // Add in the next diagonal = 21 + 31 + 41 + 51 + 61 + 71 + 54 | ||
| 141 | |||
| 142 | xor zeroe, zeroe | ||
| 143 | mov rdx, [x+8] | ||
| 144 | mulpadd 2, 1 | ||
| 145 | mov [z+24], r11 | ||
| 146 | mulpadd 3, 1 | ||
| 147 | mov [z+32], r12 | ||
| 148 | mulpadd 4, 1 | ||
| 149 | mulpadd 5, 1 | ||
| 150 | mulpadd 6, 1 | ||
| 151 | mulpade 7, 1 | ||
| 152 | mov rdx, [x+32] | ||
| 153 | mulpade 5, 4 | ||
| 154 | adcx r10, zero | ||
| 155 | |||
| 156 | // And the next one = 32 + 42 + 52 + 62 + 72 + 64 + 65 | ||
| 157 | |||
| 158 | xor zeroe, zeroe | ||
| 159 | mov rdx, [x+16] | ||
| 160 | mulpadd 3, 2 | ||
| 161 | mov [z+40], r13 | ||
| 162 | mulpadd 4, 2 | ||
| 163 | mov [z+48], r14 | ||
| 164 | mulpadd 5, 2 | ||
| 165 | mulpadd 6, 2 | ||
| 166 | mulpadd 7, 2 | ||
| 167 | mov rdx, [x+48] | ||
| 168 | mulpade 4, 6 | ||
| 169 | mulpade 5, 6 | ||
| 170 | adcx r12, zero | ||
| 171 | |||
| 172 | // And the final one = 43 + 53 + 63 + 73 + 74 + 75 + 76 | ||
| 173 | |||
| 174 | xor zeroe, zeroe | ||
| 175 | mov rdx, [x+24] | ||
| 176 | mulpadd 4, 3 | ||
| 177 | mov [z+56], r15 | ||
| 178 | mulpadd 5, 3 | ||
| 179 | mov [z+64], r8 | ||
| 180 | mulpadd 6, 3 | ||
| 181 | mulpadd 7, 3 | ||
| 182 | mov rdx, [x+56] | ||
| 183 | mulpadd 4, 7 | ||
| 184 | mulpade 5, 7 | ||
| 185 | mulpade 6, 7 | ||
| 186 | adcx r14, zero | ||
| 187 | |||
| 188 | // Double and add things; use z[1]..z[8] and thereafter the registers | ||
| 189 | // r9..r15 which haven't been written back yet | ||
| 190 | |||
| 191 | xor zeroe, zeroe | ||
| 192 | mov rdx, [x] | ||
| 193 | mulx rcx, rax, rdx | ||
| 194 | mov [z], rax | ||
| 195 | mov rax, [z+8] | ||
| 196 | adcx rax, rax | ||
| 197 | adox rax, rcx | ||
| 198 | mov [z+8], rax | ||
| 199 | |||
| 200 | mov rax, [z+16] | ||
| 201 | mov rdx, [x+8] | ||
| 202 | mulx rcx, rdx, rdx | ||
| 203 | adcx rax, rax | ||
| 204 | adox rax, rdx | ||
| 205 | mov [z+16], rax | ||
| 206 | mov rax, [z+24] | ||
| 207 | adcx rax, rax | ||
| 208 | adox rax, rcx | ||
| 209 | mov [z+24], rax | ||
| 210 | |||
| 211 | mov rax, [z+32] | ||
| 212 | mov rdx, [x+16] | ||
| 213 | mulx rcx, rdx, rdx | ||
| 214 | adcx rax, rax | ||
| 215 | adox rax, rdx | ||
| 216 | mov [z+32], rax | ||
| 217 | mov rax, [z+40] | ||
| 218 | adcx rax, rax | ||
| 219 | adox rax, rcx | ||
| 220 | mov [z+40], rax | ||
| 221 | |||
| 222 | mov rax, [z+48] | ||
| 223 | mov rdx, [x+24] | ||
| 224 | mulx rcx, rdx, rdx | ||
| 225 | adcx rax, rax | ||
| 226 | adox rax, rdx | ||
| 227 | mov [z+48], rax | ||
| 228 | mov rax, [z+56] | ||
| 229 | adcx rax, rax | ||
| 230 | adox rax, rcx | ||
| 231 | mov [z+56], rax | ||
| 232 | |||
| 233 | mov rax, [z+64] | ||
| 234 | mov rdx, [x+32] | ||
| 235 | mulx rcx, rdx, rdx | ||
| 236 | adcx rax, rax | ||
| 237 | adox rax, rdx | ||
| 238 | mov [z+64], rax | ||
| 239 | adcx r9, r9 | ||
| 240 | adox r9, rcx | ||
| 241 | mov [z+72], r9 | ||
| 242 | |||
| 243 | mov rdx, [x+40] | ||
| 244 | mulx rcx, rdx, rdx | ||
| 245 | adcx r10, r10 | ||
| 246 | adox r10, rdx | ||
| 247 | mov [z+80], r10 | ||
| 248 | adcx r11, r11 | ||
| 249 | adox r11, rcx | ||
| 250 | mov [z+88], r11 | ||
| 251 | |||
| 252 | mov rdx, [x+48] | ||
| 253 | mulx rcx, rdx, rdx | ||
| 254 | adcx r12, r12 | ||
| 255 | adox r12, rdx | ||
| 256 | mov [z+96], r12 | ||
| 257 | adcx r13, r13 | ||
| 258 | adox r13, rcx | ||
| 259 | mov [z+104], r13 | ||
| 260 | |||
| 261 | mov rdx, [x+56] | ||
| 262 | mulx r15, rdx, rdx | ||
| 263 | adcx r14, r14 | ||
| 264 | adox r14, rdx | ||
| 265 | mov [z+112], r14 | ||
| 266 | adcx r15, zero | ||
| 267 | adox r15, zero | ||
| 268 | mov [z+120], r15 | ||
| 269 | |||
| 270 | .endm | ||
| 271 | |||
| 272 | |||
| 273 | S2N_BN_SYMBOL(bignum_sqr_8_16): | ||
| 274 | _CET_ENDBR | ||
| 275 | |||
| 276 | #if WINDOWS_ABI | ||
| 277 | push rdi | ||
| 278 | push rsi | ||
| 279 | mov rdi, rcx | ||
| 280 | mov rsi, rdx | ||
| 281 | #endif | ||
| 282 | |||
| 283 | // Save more registers to play with | ||
| 284 | |||
| 285 | push rbp | ||
| 286 | push r12 | ||
| 287 | push r13 | ||
| 288 | push r14 | ||
| 289 | push r15 | ||
| 290 | |||
| 291 | // Do the multiplication | ||
| 292 | |||
| 293 | diagonals | ||
| 294 | |||
| 295 | // Real epilog | ||
| 296 | |||
| 297 | pop r15 | ||
| 298 | pop r14 | ||
| 299 | pop r13 | ||
| 300 | pop r12 | ||
| 301 | pop rbp | ||
| 302 | |||
| 303 | #if WINDOWS_ABI | ||
| 304 | pop rsi | ||
| 305 | pop rdi | ||
| 306 | #endif | ||
| 307 | ret | ||
| 308 | |||
| 309 | #if defined(__linux__) && defined(__ELF__) | ||
| 310 | .section .note.GNU-stack,"",%progbits | ||
| 311 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S index ac0b6f96c2..cb10ba2a12 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S | |||
| @@ -1,3 +1,5 @@ | |||
| 1 | // $OpenBSD: bignum_sqr_8_16_alt.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
| 2 | // | ||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | // | 4 | // |
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
| @@ -16,7 +18,8 @@ | |||
| 16 | // Square, z := x^2 | 18 | // Square, z := x^2 |
| 17 | // Input x[8]; output z[16] | 19 | // Input x[8]; output z[16] |
| 18 | // | 20 | // |
| 19 | // extern void bignum_sqr_8_16_alt (uint64_t z[static 16], uint64_t x[static 8]); | 21 | // extern void bignum_sqr_8_16_alt(uint64_t z[static 16], |
| 22 | // const uint64_t x[static 8]); | ||
| 20 | // | 23 | // |
| 21 | // Standard x86-64 ABI: RDI = z, RSI = x | 24 | // Standard x86-64 ABI: RDI = z, RSI = x |
| 22 | // Microsoft x64 ABI: RCX = z, RDX = x | 25 | // Microsoft x64 ABI: RCX = z, RDX = x |
| @@ -103,7 +106,7 @@ | |||
| 103 | adc c, 0 | 106 | adc c, 0 |
| 104 | 107 | ||
| 105 | S2N_BN_SYMBOL(bignum_sqr_8_16_alt): | 108 | S2N_BN_SYMBOL(bignum_sqr_8_16_alt): |
| 106 | _CET_ENDBR | 109 | _CET_ENDBR |
| 107 | 110 | ||
| 108 | #if WINDOWS_ABI | 111 | #if WINDOWS_ABI |
| 109 | push rdi | 112 | push rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S index 3ff8a30510..7324d3a71e 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S | |||
| @@ -1,3 +1,5 @@ | |||
| 1 | // $OpenBSD: bignum_sub.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
| 2 | // | ||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | // | 4 | // |
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
| @@ -16,9 +18,8 @@ | |||
| 16 | // Subtract, z := x - y | 18 | // Subtract, z := x - y |
| 17 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] | 19 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] |
| 18 | // | 20 | // |
| 19 | // extern uint64_t bignum_sub | 21 | // extern uint64_t bignum_sub(uint64_t p, uint64_t *z, uint64_t m, |
| 20 | // (uint64_t p, uint64_t *z, | 22 | // const uint64_t *x, uint64_t n, const uint64_t *y); |
| 21 | // uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | ||
| 22 | // | 23 | // |
| 23 | // Does the z := x - y operation, truncating modulo p words in general and | 24 | // Does the z := x - y operation, truncating modulo p words in general and |
| 24 | // returning a top borrow (0 or 1) in the p'th place, only subtracting input | 25 | // returning a top borrow (0 or 1) in the p'th place, only subtracting input |
| @@ -49,7 +50,7 @@ | |||
| 49 | 50 | ||
| 50 | 51 | ||
| 51 | S2N_BN_SYMBOL(bignum_sub): | 52 | S2N_BN_SYMBOL(bignum_sub): |
| 52 | _CET_ENDBR | 53 | _CET_ENDBR |
| 53 | 54 | ||
| 54 | #if WINDOWS_ABI | 55 | #if WINDOWS_ABI |
| 55 | push rdi | 56 | push rdi |
| @@ -75,7 +76,7 @@ S2N_BN_SYMBOL(bignum_sub): | |||
| 75 | cmp p, n | 76 | cmp p, n |
| 76 | cmovc n, p | 77 | cmovc n, p |
| 77 | cmp m, n | 78 | cmp m, n |
| 78 | jc ylonger | 79 | jc bignum_sub_ylonger |
| 79 | 80 | ||
| 80 | // The case where x is longer or of the same size (p >= m >= n) | 81 | // The case where x is longer or of the same size (p >= m >= n) |
| 81 | 82 | ||
| @@ -83,32 +84,32 @@ S2N_BN_SYMBOL(bignum_sub): | |||
| 83 | sub m, n | 84 | sub m, n |
| 84 | inc m | 85 | inc m |
| 85 | test n, n | 86 | test n, n |
| 86 | jz xtest | 87 | jz bignum_sub_xtest |
| 87 | xmainloop: | 88 | bignum_sub_xmainloop: |
| 88 | mov a, [x+8*i] | 89 | mov a, [x+8*i] |
| 89 | sbb a, [y+8*i] | 90 | sbb a, [y+8*i] |
| 90 | mov [z+8*i],a | 91 | mov [z+8*i],a |
| 91 | inc i | 92 | inc i |
| 92 | dec n | 93 | dec n |
| 93 | jnz xmainloop | 94 | jnz bignum_sub_xmainloop |
| 94 | jmp xtest | 95 | jmp bignum_sub_xtest |
| 95 | xtoploop: | 96 | bignum_sub_xtoploop: |
| 96 | mov a, [x+8*i] | 97 | mov a, [x+8*i] |
| 97 | sbb a, 0 | 98 | sbb a, 0 |
| 98 | mov [z+8*i],a | 99 | mov [z+8*i],a |
| 99 | inc i | 100 | inc i |
| 100 | xtest: | 101 | bignum_sub_xtest: |
| 101 | dec m | 102 | dec m |
| 102 | jnz xtoploop | 103 | jnz bignum_sub_xtoploop |
| 103 | sbb a, a | 104 | sbb a, a |
| 104 | test p, p | 105 | test p, p |
| 105 | jz tailskip | 106 | jz bignum_sub_tailskip |
| 106 | tailloop: | 107 | bignum_sub_tailloop: |
| 107 | mov [z+8*i],a | 108 | mov [z+8*i],a |
| 108 | inc i | 109 | inc i |
| 109 | dec p | 110 | dec p |
| 110 | jnz tailloop | 111 | jnz bignum_sub_tailloop |
| 111 | tailskip: | 112 | bignum_sub_tailskip: |
| 112 | neg a | 113 | neg a |
| 113 | #if WINDOWS_ABI | 114 | #if WINDOWS_ABI |
| 114 | pop rsi | 115 | pop rsi |
| @@ -118,29 +119,29 @@ tailskip: | |||
| 118 | 119 | ||
| 119 | // The case where y is longer (p >= n > m) | 120 | // The case where y is longer (p >= n > m) |
| 120 | 121 | ||
| 121 | ylonger: | 122 | bignum_sub_ylonger: |
| 122 | 123 | ||
| 123 | sub p, n | 124 | sub p, n |
| 124 | sub n, m | 125 | sub n, m |
| 125 | test m, m | 126 | test m, m |
| 126 | jz ytoploop | 127 | jz bignum_sub_ytoploop |
| 127 | ymainloop: | 128 | bignum_sub_ymainloop: |
| 128 | mov a, [x+8*i] | 129 | mov a, [x+8*i] |
| 129 | sbb a, [y+8*i] | 130 | sbb a, [y+8*i] |
| 130 | mov [z+8*i],a | 131 | mov [z+8*i],a |
| 131 | inc i | 132 | inc i |
| 132 | dec m | 133 | dec m |
| 133 | jnz ymainloop | 134 | jnz bignum_sub_ymainloop |
| 134 | ytoploop: | 135 | bignum_sub_ytoploop: |
| 135 | mov ashort, 0 | 136 | mov ashort, 0 |
| 136 | sbb a, [y+8*i] | 137 | sbb a, [y+8*i] |
| 137 | mov [z+8*i],a | 138 | mov [z+8*i],a |
| 138 | inc i | 139 | inc i |
| 139 | dec n | 140 | dec n |
| 140 | jnz ytoploop | 141 | jnz bignum_sub_ytoploop |
| 141 | sbb a, a | 142 | sbb a, a |
| 142 | test p, p | 143 | test p, p |
| 143 | jnz tailloop | 144 | jnz bignum_sub_tailloop |
| 144 | neg a | 145 | neg a |
| 145 | #if WINDOWS_ABI | 146 | #if WINDOWS_ABI |
| 146 | pop rsi | 147 | pop rsi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bn_arch.c b/src/lib/libcrypto/bn/arch/amd64/bn_arch.c index a377a05681..6c3888687b 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bn_arch.c +++ b/src/lib/libcrypto/bn/arch/amd64/bn_arch.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_arch.c,v 1.7 2023/06/24 16:01:44 jsing Exp $ */ | 1 | /* $OpenBSD: bn_arch.c,v 1.17 2025/09/01 15:33:23 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -19,6 +19,7 @@ | |||
| 19 | 19 | ||
| 20 | #include "bn_arch.h" | 20 | #include "bn_arch.h" |
| 21 | #include "bn_local.h" | 21 | #include "bn_local.h" |
| 22 | #include "crypto_arch.h" | ||
| 22 | #include "s2n_bignum.h" | 23 | #include "s2n_bignum.h" |
| 23 | 24 | ||
| 24 | #ifdef HAVE_BN_ADD | 25 | #ifdef HAVE_BN_ADD |
| @@ -26,8 +27,8 @@ BN_ULONG | |||
| 26 | bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b, | 27 | bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b, |
| 27 | int b_len) | 28 | int b_len) |
| 28 | { | 29 | { |
| 29 | return bignum_add(r_len, (uint64_t *)r, a_len, (uint64_t *)a, | 30 | return bignum_add(r_len, (uint64_t *)r, a_len, (const uint64_t *)a, |
| 30 | b_len, (uint64_t *)b); | 31 | b_len, (const uint64_t *)b); |
| 31 | } | 32 | } |
| 32 | #endif | 33 | #endif |
| 33 | 34 | ||
| @@ -36,8 +37,8 @@ bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b, | |||
| 36 | BN_ULONG | 37 | BN_ULONG |
| 37 | bn_add_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n) | 38 | bn_add_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n) |
| 38 | { | 39 | { |
| 39 | return bignum_add(n, (uint64_t *)rd, n, (uint64_t *)ad, n, | 40 | return bignum_add(n, (uint64_t *)rd, n, (const uint64_t *)ad, n, |
| 40 | (uint64_t *)bd); | 41 | (const uint64_t *)bd); |
| 41 | } | 42 | } |
| 42 | #endif | 43 | #endif |
| 43 | 44 | ||
| @@ -46,8 +47,8 @@ BN_ULONG | |||
| 46 | bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b, | 47 | bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b, |
| 47 | int b_len) | 48 | int b_len) |
| 48 | { | 49 | { |
| 49 | return bignum_sub(r_len, (uint64_t *)r, a_len, (uint64_t *)a, | 50 | return bignum_sub(r_len, (uint64_t *)r, a_len, (const uint64_t *)a, |
| 50 | b_len, (uint64_t *)b); | 51 | b_len, (const uint64_t *)b); |
| 51 | } | 52 | } |
| 52 | #endif | 53 | #endif |
| 53 | 54 | ||
| @@ -55,52 +56,99 @@ bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b, | |||
| 55 | BN_ULONG | 56 | BN_ULONG |
| 56 | bn_sub_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n) | 57 | bn_sub_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n) |
| 57 | { | 58 | { |
| 58 | return bignum_sub(n, (uint64_t *)rd, n, (uint64_t *)ad, n, | 59 | return bignum_sub(n, (uint64_t *)rd, n, (const uint64_t *)ad, n, |
| 59 | (uint64_t *)bd); | 60 | (const uint64_t *)bd); |
| 60 | } | 61 | } |
| 61 | #endif | 62 | #endif |
| 62 | 63 | ||
| 63 | #ifdef HAVE_BN_MUL_ADD_WORDS | 64 | #ifdef HAVE_BN_MOD_ADD_WORDS |
| 64 | BN_ULONG | 65 | void |
| 65 | bn_mul_add_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w) | 66 | bn_mod_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, |
| 67 | const BN_ULONG *m, size_t n) | ||
| 66 | { | 68 | { |
| 67 | return bignum_cmadd(num, (uint64_t *)rd, w, num, (uint64_t *)ad); | 69 | bignum_modadd(n, (uint64_t *)r, (const uint64_t *)a, |
| 70 | (const uint64_t *)b, (const uint64_t *)m); | ||
| 68 | } | 71 | } |
| 69 | #endif | 72 | #endif |
| 70 | 73 | ||
| 71 | #ifdef HAVE_BN_MUL_WORDS | 74 | #ifdef HAVE_BN_MOD_SUB_WORDS |
| 72 | BN_ULONG | 75 | void |
| 73 | bn_mul_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w) | 76 | bn_mod_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, |
| 77 | const BN_ULONG *m, size_t n) | ||
| 74 | { | 78 | { |
| 75 | return bignum_cmul(num, (uint64_t *)rd, w, num, (uint64_t *)ad); | 79 | bignum_modsub(n, (uint64_t *)r, (const uint64_t *)a, |
| 80 | (const uint64_t *)b, (const uint64_t *)m); | ||
| 76 | } | 81 | } |
| 77 | #endif | 82 | #endif |
| 78 | 83 | ||
| 79 | #ifdef HAVE_BN_MUL_COMBA4 | 84 | #ifdef HAVE_BN_MUL_COMBA4 |
| 80 | void | 85 | void |
| 81 | bn_mul_comba4(BN_ULONG *rd, BN_ULONG *ad, BN_ULONG *bd) | 86 | bn_mul_comba4(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd) |
| 87 | { | ||
| 88 | if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_ADX) != 0) { | ||
| 89 | bignum_mul_4_8((uint64_t *)rd, (const uint64_t *)ad, | ||
| 90 | (const uint64_t *)bd); | ||
| 91 | return; | ||
| 92 | } | ||
| 93 | |||
| 94 | bignum_mul_4_8_alt((uint64_t *)rd, (const uint64_t *)ad, | ||
| 95 | (const uint64_t *)bd); | ||
| 96 | } | ||
| 97 | #endif | ||
| 98 | |||
| 99 | #ifdef HAVE_BN_MUL_COMBA6 | ||
| 100 | void | ||
| 101 | bn_mul_comba6(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd) | ||
| 82 | { | 102 | { |
| 83 | /* XXX - consider using non-alt on CPUs that have the ADX extension. */ | 103 | if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_ADX) != 0) { |
| 84 | bignum_mul_4_8_alt((uint64_t *)rd, (uint64_t *)ad, (uint64_t *)bd); | 104 | bignum_mul_6_12((uint64_t *)rd, (const uint64_t *)ad, |
| 105 | (const uint64_t *)bd); | ||
| 106 | return; | ||
| 107 | } | ||
| 108 | |||
| 109 | bignum_mul_6_12_alt((uint64_t *)rd, (const uint64_t *)ad, | ||
| 110 | (const uint64_t *)bd); | ||
| 85 | } | 111 | } |
| 86 | #endif | 112 | #endif |
| 87 | 113 | ||
| 88 | #ifdef HAVE_BN_MUL_COMBA8 | 114 | #ifdef HAVE_BN_MUL_COMBA8 |
| 89 | void | 115 | void |
| 90 | bn_mul_comba8(BN_ULONG *rd, BN_ULONG *ad, BN_ULONG *bd) | 116 | bn_mul_comba8(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd) |
| 91 | { | 117 | { |
| 92 | /* XXX - consider using non-alt on CPUs that have the ADX extension. */ | 118 | if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_ADX) != 0) { |
| 93 | bignum_mul_8_16_alt((uint64_t *)rd, (uint64_t *)ad, (uint64_t *)bd); | 119 | bignum_mul_8_16((uint64_t *)rd, (const uint64_t *)ad, |
| 120 | (const uint64_t *)bd); | ||
| 121 | return; | ||
| 122 | } | ||
| 123 | |||
| 124 | bignum_mul_8_16_alt((uint64_t *)rd, (const uint64_t *)ad, | ||
| 125 | (const uint64_t *)bd); | ||
| 94 | } | 126 | } |
| 95 | #endif | 127 | #endif |
| 96 | 128 | ||
| 97 | #ifdef HAVE_BN_SQR | 129 | #ifdef HAVE_BN_MUL_WORDS |
| 98 | int | 130 | void |
| 99 | bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx) | 131 | bn_mul_words(BN_ULONG *r, const BN_ULONG *a, int a_len, const BN_ULONG *b, |
| 132 | int b_len) | ||
| 133 | { | ||
| 134 | bignum_mul(a_len + b_len, (uint64_t *)r, a_len, (const uint64_t *)a, | ||
| 135 | b_len, (const uint64_t *)b); | ||
| 136 | } | ||
| 137 | #endif | ||
| 138 | |||
| 139 | #ifdef HAVE_BN_MULW_ADD_WORDS | ||
| 140 | BN_ULONG | ||
| 141 | bn_mulw_add_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w) | ||
| 100 | { | 142 | { |
| 101 | bignum_sqr(r_len, (uint64_t *)r->d, a->top, (uint64_t *)a->d); | 143 | return bignum_cmadd(num, (uint64_t *)rd, w, num, (const uint64_t *)ad); |
| 144 | } | ||
| 145 | #endif | ||
| 102 | 146 | ||
| 103 | return 1; | 147 | #ifdef HAVE_BN_MULW_WORDS |
| 148 | BN_ULONG | ||
| 149 | bn_mulw_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w) | ||
| 150 | { | ||
| 151 | return bignum_cmul(num, (uint64_t *)rd, w, num, (const uint64_t *)ad); | ||
| 104 | } | 152 | } |
| 105 | #endif | 153 | #endif |
| 106 | 154 | ||
| @@ -108,8 +156,25 @@ bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx) | |||
| 108 | void | 156 | void |
| 109 | bn_sqr_comba4(BN_ULONG *rd, const BN_ULONG *ad) | 157 | bn_sqr_comba4(BN_ULONG *rd, const BN_ULONG *ad) |
| 110 | { | 158 | { |
| 111 | /* XXX - consider using non-alt on CPUs that have the ADX extension. */ | 159 | if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_ADX) != 0) { |
| 112 | bignum_sqr_4_8_alt((uint64_t *)rd, (uint64_t *)ad); | 160 | bignum_sqr_4_8((uint64_t *)rd, (const uint64_t *)ad); |
| 161 | return; | ||
| 162 | } | ||
| 163 | |||
| 164 | bignum_sqr_4_8_alt((uint64_t *)rd, (const uint64_t *)ad); | ||
| 165 | } | ||
| 166 | #endif | ||
| 167 | |||
| 168 | #ifdef HAVE_BN_SQR_COMBA6 | ||
| 169 | void | ||
| 170 | bn_sqr_comba6(BN_ULONG *rd, const BN_ULONG *ad) | ||
| 171 | { | ||
| 172 | if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_ADX) != 0) { | ||
| 173 | bignum_sqr_6_12((uint64_t *)rd, (const uint64_t *)ad); | ||
| 174 | return; | ||
| 175 | } | ||
| 176 | |||
| 177 | bignum_sqr_6_12_alt((uint64_t *)rd, (const uint64_t *)ad); | ||
| 113 | } | 178 | } |
| 114 | #endif | 179 | #endif |
| 115 | 180 | ||
| @@ -117,8 +182,20 @@ bn_sqr_comba4(BN_ULONG *rd, const BN_ULONG *ad) | |||
| 117 | void | 182 | void |
| 118 | bn_sqr_comba8(BN_ULONG *rd, const BN_ULONG *ad) | 183 | bn_sqr_comba8(BN_ULONG *rd, const BN_ULONG *ad) |
| 119 | { | 184 | { |
| 120 | /* XXX - consider using non-alt on CPUs that have the ADX extension. */ | 185 | if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_ADX) != 0) { |
| 121 | bignum_sqr_8_16_alt((uint64_t *)rd, (uint64_t *)ad); | 186 | bignum_sqr_8_16((uint64_t *)rd, (const uint64_t *)ad); |
| 187 | return; | ||
| 188 | } | ||
| 189 | |||
| 190 | bignum_sqr_8_16_alt((uint64_t *)rd, (const uint64_t *)ad); | ||
| 191 | } | ||
| 192 | #endif | ||
| 193 | |||
| 194 | #ifdef HAVE_BN_SQR_WORDS | ||
| 195 | void | ||
| 196 | bn_sqr_words(BN_ULONG *rd, const BN_ULONG *ad, int a_len) | ||
| 197 | { | ||
| 198 | bignum_sqr(a_len * 2, (uint64_t *)rd, a_len, (const uint64_t *)ad); | ||
| 122 | } | 199 | } |
| 123 | #endif | 200 | #endif |
| 124 | 201 | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h b/src/lib/libcrypto/bn/arch/amd64/bn_arch.h index 927cd75208..3cb1d1d274 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h +++ b/src/lib/libcrypto/bn/arch/amd64/bn_arch.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_arch.h,v 1.14 2024/03/26 06:09:25 jsing Exp $ */ | 1 | /* $OpenBSD: bn_arch.h,v 1.19 2025/09/01 15:15:44 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -27,14 +27,20 @@ | |||
| 27 | 27 | ||
| 28 | #define HAVE_BN_DIV_WORDS | 28 | #define HAVE_BN_DIV_WORDS |
| 29 | 29 | ||
| 30 | #define HAVE_BN_MUL_ADD_WORDS | 30 | #define HAVE_BN_MOD_ADD_WORDS |
| 31 | #define HAVE_BN_MOD_SUB_WORDS | ||
| 32 | |||
| 31 | #define HAVE_BN_MUL_COMBA4 | 33 | #define HAVE_BN_MUL_COMBA4 |
| 34 | #define HAVE_BN_MUL_COMBA6 | ||
| 32 | #define HAVE_BN_MUL_COMBA8 | 35 | #define HAVE_BN_MUL_COMBA8 |
| 33 | #define HAVE_BN_MUL_WORDS | 36 | #define HAVE_BN_MUL_WORDS |
| 37 | #define HAVE_BN_MULW_ADD_WORDS | ||
| 38 | #define HAVE_BN_MULW_WORDS | ||
| 34 | 39 | ||
| 35 | #define HAVE_BN_SQR | ||
| 36 | #define HAVE_BN_SQR_COMBA4 | 40 | #define HAVE_BN_SQR_COMBA4 |
| 41 | #define HAVE_BN_SQR_COMBA6 | ||
| 37 | #define HAVE_BN_SQR_COMBA8 | 42 | #define HAVE_BN_SQR_COMBA8 |
| 43 | #define HAVE_BN_SQR_WORDS | ||
| 38 | 44 | ||
| 39 | #define HAVE_BN_SUB | 45 | #define HAVE_BN_SUB |
| 40 | #define HAVE_BN_SUB_WORDS | 46 | #define HAVE_BN_SUB_WORDS |
diff --git a/src/lib/libcrypto/bn/arch/amd64/word_clz.S b/src/lib/libcrypto/bn/arch/amd64/word_clz.S index 3926fcd4b0..705fbdbbda 100644 --- a/src/lib/libcrypto/bn/arch/amd64/word_clz.S +++ b/src/lib/libcrypto/bn/arch/amd64/word_clz.S | |||
| @@ -1,3 +1,5 @@ | |||
| 1 | // $OpenBSD: word_clz.S,v 1.7 2025/08/11 14:13:56 jsing Exp $ | ||
| 2 | // | ||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | // | 4 | // |
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
| @@ -16,7 +18,7 @@ | |||
| 16 | // Count leading zero bits in a single word | 18 | // Count leading zero bits in a single word |
| 17 | // Input a; output function return | 19 | // Input a; output function return |
| 18 | // | 20 | // |
| 19 | // extern uint64_t word_clz (uint64_t a); | 21 | // extern uint64_t word_clz(uint64_t a); |
| 20 | // | 22 | // |
| 21 | // Standard x86-64 ABI: RDI = a, returns RAX | 23 | // Standard x86-64 ABI: RDI = a, returns RAX |
| 22 | // Microsoft x64 ABI: RCX = a, returns RAX | 24 | // Microsoft x64 ABI: RCX = a, returns RAX |
| @@ -30,7 +32,7 @@ | |||
| 30 | .text | 32 | .text |
| 31 | 33 | ||
| 32 | S2N_BN_SYMBOL(word_clz): | 34 | S2N_BN_SYMBOL(word_clz): |
| 33 | _CET_ENDBR | 35 | _CET_ENDBR |
| 34 | 36 | ||
| 35 | #if WINDOWS_ABI | 37 | #if WINDOWS_ABI |
| 36 | push rdi | 38 | push rdi |
diff --git a/src/lib/libcrypto/bn/arch/i386/bn_arch.h b/src/lib/libcrypto/bn/arch/i386/bn_arch.h index eef519fcc7..288cbdeaa9 100644 --- a/src/lib/libcrypto/bn/arch/i386/bn_arch.h +++ b/src/lib/libcrypto/bn/arch/i386/bn_arch.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_arch.h,v 1.9 2023/02/16 10:41:03 jsing Exp $ */ | 1 | /* $OpenBSD: bn_arch.h,v 1.11 2025/09/07 03:56:37 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -26,14 +26,13 @@ | |||
| 26 | 26 | ||
| 27 | #define HAVE_BN_DIV_WORDS | 27 | #define HAVE_BN_DIV_WORDS |
| 28 | 28 | ||
| 29 | #define HAVE_BN_MUL_ADD_WORDS | ||
| 30 | #define HAVE_BN_MUL_COMBA4 | 29 | #define HAVE_BN_MUL_COMBA4 |
| 31 | #define HAVE_BN_MUL_COMBA8 | 30 | #define HAVE_BN_MUL_COMBA8 |
| 32 | #define HAVE_BN_MUL_WORDS | 31 | #define HAVE_BN_MULW_ADD_WORDS |
| 32 | #define HAVE_BN_MULW_WORDS | ||
| 33 | 33 | ||
| 34 | #define HAVE_BN_SQR_COMBA4 | 34 | #define HAVE_BN_SQR_COMBA4 |
| 35 | #define HAVE_BN_SQR_COMBA8 | 35 | #define HAVE_BN_SQR_COMBA8 |
| 36 | #define HAVE_BN_SQR_WORDS | ||
| 37 | 36 | ||
| 38 | #define HAVE_BN_SUB_WORDS | 37 | #define HAVE_BN_SUB_WORDS |
| 39 | 38 | ||
diff --git a/src/lib/libcrypto/bn/arch/mips64/bn_arch.h b/src/lib/libcrypto/bn/arch/mips64/bn_arch.h index 53771bce1e..562a398f33 100644 --- a/src/lib/libcrypto/bn/arch/mips64/bn_arch.h +++ b/src/lib/libcrypto/bn/arch/mips64/bn_arch.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_arch.h,v 1.7 2023/01/23 12:17:58 jsing Exp $ */ | 1 | /* $OpenBSD: bn_arch.h,v 1.9 2025/09/07 03:56:37 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -25,14 +25,13 @@ | |||
| 25 | #define HAVE_BN_DIV_WORDS | 25 | #define HAVE_BN_DIV_WORDS |
| 26 | #define HAVE_BN_DIV_3_WORDS | 26 | #define HAVE_BN_DIV_3_WORDS |
| 27 | 27 | ||
| 28 | #define HAVE_BN_MUL_ADD_WORDS | ||
| 29 | #define HAVE_BN_MUL_COMBA4 | 28 | #define HAVE_BN_MUL_COMBA4 |
| 30 | #define HAVE_BN_MUL_COMBA8 | 29 | #define HAVE_BN_MUL_COMBA8 |
| 31 | #define HAVE_BN_MUL_WORDS | 30 | #define HAVE_BN_MULW_ADD_WORDS |
| 31 | #define HAVE_BN_MULW_WORDS | ||
| 32 | 32 | ||
| 33 | #define HAVE_BN_SQR_COMBA4 | 33 | #define HAVE_BN_SQR_COMBA4 |
| 34 | #define HAVE_BN_SQR_COMBA8 | 34 | #define HAVE_BN_SQR_COMBA8 |
| 35 | #define HAVE_BN_SQR_WORDS | ||
| 36 | 35 | ||
| 37 | #define HAVE_BN_SUB_WORDS | 36 | #define HAVE_BN_SUB_WORDS |
| 38 | 37 | ||
diff --git a/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h b/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h index 46e932a2d5..21bcdf48d3 100644 --- a/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h +++ b/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_arch.h,v 1.6 2023/01/23 12:17:58 jsing Exp $ */ | 1 | /* $OpenBSD: bn_arch.h,v 1.8 2025/09/07 03:56:37 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -24,14 +24,13 @@ | |||
| 24 | 24 | ||
| 25 | #define HAVE_BN_DIV_WORDS | 25 | #define HAVE_BN_DIV_WORDS |
| 26 | 26 | ||
| 27 | #define HAVE_BN_MUL_ADD_WORDS | ||
| 28 | #define HAVE_BN_MUL_COMBA4 | 27 | #define HAVE_BN_MUL_COMBA4 |
| 29 | #define HAVE_BN_MUL_COMBA8 | 28 | #define HAVE_BN_MUL_COMBA8 |
| 30 | #define HAVE_BN_MUL_WORDS | 29 | #define HAVE_BN_MULW_ADD_WORDS |
| 30 | #define HAVE_BN_MULW_WORDS | ||
| 31 | 31 | ||
| 32 | #define HAVE_BN_SQR_COMBA4 | 32 | #define HAVE_BN_SQR_COMBA4 |
| 33 | #define HAVE_BN_SQR_COMBA8 | 33 | #define HAVE_BN_SQR_COMBA8 |
| 34 | #define HAVE_BN_SQR_WORDS | ||
| 35 | 34 | ||
| 36 | #define HAVE_BN_SUB_WORDS | 35 | #define HAVE_BN_SUB_WORDS |
| 37 | 36 | ||
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl index 71b775af8d..9b4b11ad5b 100644 --- a/src/lib/libcrypto/bn/asm/bn-586.pl +++ b/src/lib/libcrypto/bn/asm/bn-586.pl | |||
| @@ -6,21 +6,20 @@ require "x86asm.pl"; | |||
| 6 | 6 | ||
| 7 | &asm_init($ARGV[0],$0); | 7 | &asm_init($ARGV[0],$0); |
| 8 | 8 | ||
| 9 | $sse2=0; | 9 | $sse2=1; |
| 10 | for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | ||
| 11 | 10 | ||
| 12 | &external_label("OPENSSL_ia32cap_P") if ($sse2); | 11 | &external_label("OPENSSL_ia32cap_P") if ($sse2); |
| 13 | 12 | ||
| 14 | &bn_mul_add_words("bn_mul_add_words"); | 13 | &bn_mulw_add_words("bn_mulw_add_words"); |
| 15 | &bn_mul_words("bn_mul_words"); | 14 | &bn_mulw_words("bn_mulw_words"); |
| 16 | &bn_sqr_words("bn_sqr_words"); | 15 | &bn_sqr_word_wise("bn_sqr_word_wise"); |
| 17 | &bn_div_words("bn_div_words"); | 16 | &bn_div_words("bn_div_words"); |
| 18 | &bn_add_words("bn_add_words"); | 17 | &bn_add_words("bn_add_words"); |
| 19 | &bn_sub_words("bn_sub_words"); | 18 | &bn_sub_words("bn_sub_words"); |
| 20 | 19 | ||
| 21 | &asm_finish(); | 20 | &asm_finish(); |
| 22 | 21 | ||
| 23 | sub bn_mul_add_words | 22 | sub bn_mulw_add_words |
| 24 | { | 23 | { |
| 25 | local($name)=@_; | 24 | local($name)=@_; |
| 26 | 25 | ||
| @@ -207,7 +206,7 @@ sub bn_mul_add_words | |||
| 207 | &function_end($name); | 206 | &function_end($name); |
| 208 | } | 207 | } |
| 209 | 208 | ||
| 210 | sub bn_mul_words | 209 | sub bn_mulw_words |
| 211 | { | 210 | { |
| 212 | local($name)=@_; | 211 | local($name)=@_; |
| 213 | 212 | ||
| @@ -319,7 +318,7 @@ sub bn_mul_words | |||
| 319 | &function_end($name); | 318 | &function_end($name); |
| 320 | } | 319 | } |
| 321 | 320 | ||
| 322 | sub bn_sqr_words | 321 | sub bn_sqr_word_wise |
| 323 | { | 322 | { |
| 324 | local($name)=@_; | 323 | local($name)=@_; |
| 325 | 324 | ||
diff --git a/src/lib/libcrypto/bn/asm/mips.pl b/src/lib/libcrypto/bn/asm/mips.pl index 02d43e15b0..aaa0c5d8b0 100644 --- a/src/lib/libcrypto/bn/asm/mips.pl +++ b/src/lib/libcrypto/bn/asm/mips.pl | |||
| @@ -110,19 +110,19 @@ $code.=<<___; | |||
| 110 | .set noat | 110 | .set noat |
| 111 | 111 | ||
| 112 | .align 5 | 112 | .align 5 |
| 113 | .globl bn_mul_add_words | 113 | .globl bn_mulw_add_words |
| 114 | .ent bn_mul_add_words | 114 | .ent bn_mulw_add_words |
| 115 | bn_mul_add_words: | 115 | bn_mulw_add_words: |
| 116 | .set noreorder | 116 | .set noreorder |
| 117 | bgtz $a2,bn_mul_add_words_internal | 117 | bgtz $a2,bn_mulw_add_words_internal |
| 118 | move $v0,$zero | 118 | move $v0,$zero |
| 119 | jr $ra | 119 | jr $ra |
| 120 | move $a0,$v0 | 120 | move $a0,$v0 |
| 121 | .end bn_mul_add_words | 121 | .end bn_mulw_add_words |
| 122 | 122 | ||
| 123 | .align 5 | 123 | .align 5 |
| 124 | .ent bn_mul_add_words_internal | 124 | .ent bn_mulw_add_words_internal |
| 125 | bn_mul_add_words_internal: | 125 | bn_mulw_add_words_internal: |
| 126 | ___ | 126 | ___ |
| 127 | $code.=<<___ if ($flavour =~ /nubi/i); | 127 | $code.=<<___ if ($flavour =~ /nubi/i); |
| 128 | .frame $sp,6*$SZREG,$ra | 128 | .frame $sp,6*$SZREG,$ra |
| @@ -140,9 +140,9 @@ $code.=<<___; | |||
| 140 | .set reorder | 140 | .set reorder |
| 141 | li $minus4,-4 | 141 | li $minus4,-4 |
| 142 | and $ta0,$a2,$minus4 | 142 | and $ta0,$a2,$minus4 |
| 143 | beqz $ta0,.L_bn_mul_add_words_tail | 143 | beqz $ta0,.L_bn_mulw_add_words_tail |
| 144 | 144 | ||
| 145 | .L_bn_mul_add_words_loop: | 145 | .L_bn_mulw_add_words_loop: |
| 146 | $LD $t0,0($a1) | 146 | $LD $t0,0($a1) |
| 147 | $MULTU $t0,$a3 | 147 | $MULTU $t0,$a3 |
| 148 | $LD $t1,0($a0) | 148 | $LD $t1,0($a0) |
| @@ -201,13 +201,13 @@ $code.=<<___; | |||
| 201 | sltu $at,$ta3,$at | 201 | sltu $at,$ta3,$at |
| 202 | $ST $ta3,-$BNSZ($a0) | 202 | $ST $ta3,-$BNSZ($a0) |
| 203 | .set noreorder | 203 | .set noreorder |
| 204 | bgtz $ta0,.L_bn_mul_add_words_loop | 204 | bgtz $ta0,.L_bn_mulw_add_words_loop |
| 205 | $ADDU $v0,$at | 205 | $ADDU $v0,$at |
| 206 | 206 | ||
| 207 | beqz $a2,.L_bn_mul_add_words_return | 207 | beqz $a2,.L_bn_mulw_add_words_return |
| 208 | nop | 208 | nop |
| 209 | 209 | ||
| 210 | .L_bn_mul_add_words_tail: | 210 | .L_bn_mulw_add_words_tail: |
| 211 | .set reorder | 211 | .set reorder |
| 212 | $LD $t0,0($a1) | 212 | $LD $t0,0($a1) |
| 213 | $MULTU $t0,$a3 | 213 | $MULTU $t0,$a3 |
| @@ -222,7 +222,7 @@ $code.=<<___; | |||
| 222 | sltu $at,$t1,$at | 222 | sltu $at,$t1,$at |
| 223 | $ST $t1,0($a0) | 223 | $ST $t1,0($a0) |
| 224 | $ADDU $v0,$at | 224 | $ADDU $v0,$at |
| 225 | beqz $a2,.L_bn_mul_add_words_return | 225 | beqz $a2,.L_bn_mulw_add_words_return |
| 226 | 226 | ||
| 227 | $LD $t0,$BNSZ($a1) | 227 | $LD $t0,$BNSZ($a1) |
| 228 | $MULTU $t0,$a3 | 228 | $MULTU $t0,$a3 |
| @@ -237,7 +237,7 @@ $code.=<<___; | |||
| 237 | sltu $at,$t1,$at | 237 | sltu $at,$t1,$at |
| 238 | $ST $t1,$BNSZ($a0) | 238 | $ST $t1,$BNSZ($a0) |
| 239 | $ADDU $v0,$at | 239 | $ADDU $v0,$at |
| 240 | beqz $a2,.L_bn_mul_add_words_return | 240 | beqz $a2,.L_bn_mulw_add_words_return |
| 241 | 241 | ||
| 242 | $LD $t0,2*$BNSZ($a1) | 242 | $LD $t0,2*$BNSZ($a1) |
| 243 | $MULTU $t0,$a3 | 243 | $MULTU $t0,$a3 |
| @@ -252,7 +252,7 @@ $code.=<<___; | |||
| 252 | $ST $t1,2*$BNSZ($a0) | 252 | $ST $t1,2*$BNSZ($a0) |
| 253 | $ADDU $v0,$at | 253 | $ADDU $v0,$at |
| 254 | 254 | ||
| 255 | .L_bn_mul_add_words_return: | 255 | .L_bn_mulw_add_words_return: |
| 256 | .set noreorder | 256 | .set noreorder |
| 257 | ___ | 257 | ___ |
| 258 | $code.=<<___ if ($flavour =~ /nubi/i); | 258 | $code.=<<___ if ($flavour =~ /nubi/i); |
| @@ -266,22 +266,22 @@ ___ | |||
| 266 | $code.=<<___; | 266 | $code.=<<___; |
| 267 | jr $ra | 267 | jr $ra |
| 268 | move $a0,$v0 | 268 | move $a0,$v0 |
| 269 | .end bn_mul_add_words_internal | 269 | .end bn_mulw_add_words_internal |
| 270 | 270 | ||
| 271 | .align 5 | 271 | .align 5 |
| 272 | .globl bn_mul_words | 272 | .globl bn_mulw_words |
| 273 | .ent bn_mul_words | 273 | .ent bn_mulw_words |
| 274 | bn_mul_words: | 274 | bn_mulw_words: |
| 275 | .set noreorder | 275 | .set noreorder |
| 276 | bgtz $a2,bn_mul_words_internal | 276 | bgtz $a2,bn_mulw_words_internal |
| 277 | move $v0,$zero | 277 | move $v0,$zero |
| 278 | jr $ra | 278 | jr $ra |
| 279 | move $a0,$v0 | 279 | move $a0,$v0 |
| 280 | .end bn_mul_words | 280 | .end bn_mulw_words |
| 281 | 281 | ||
| 282 | .align 5 | 282 | .align 5 |
| 283 | .ent bn_mul_words_internal | 283 | .ent bn_mulw_words_internal |
| 284 | bn_mul_words_internal: | 284 | bn_mulw_words_internal: |
| 285 | ___ | 285 | ___ |
| 286 | $code.=<<___ if ($flavour =~ /nubi/i); | 286 | $code.=<<___ if ($flavour =~ /nubi/i); |
| 287 | .frame $sp,6*$SZREG,$ra | 287 | .frame $sp,6*$SZREG,$ra |
| @@ -299,9 +299,9 @@ $code.=<<___; | |||
| 299 | .set reorder | 299 | .set reorder |
| 300 | li $minus4,-4 | 300 | li $minus4,-4 |
| 301 | and $ta0,$a2,$minus4 | 301 | and $ta0,$a2,$minus4 |
| 302 | beqz $ta0,.L_bn_mul_words_tail | 302 | beqz $ta0,.L_bn_mulw_words_tail |
| 303 | 303 | ||
| 304 | .L_bn_mul_words_loop: | 304 | .L_bn_mulw_words_loop: |
| 305 | $LD $t0,0($a1) | 305 | $LD $t0,0($a1) |
| 306 | $MULTU $t0,$a3 | 306 | $MULTU $t0,$a3 |
| 307 | $LD $t2,$BNSZ($a1) | 307 | $LD $t2,$BNSZ($a1) |
| @@ -341,13 +341,13 @@ $code.=<<___; | |||
| 341 | sltu $ta3,$v0,$at | 341 | sltu $ta3,$v0,$at |
| 342 | $ST $v0,-$BNSZ($a0) | 342 | $ST $v0,-$BNSZ($a0) |
| 343 | .set noreorder | 343 | .set noreorder |
| 344 | bgtz $ta0,.L_bn_mul_words_loop | 344 | bgtz $ta0,.L_bn_mulw_words_loop |
| 345 | $ADDU $v0,$ta3,$ta2 | 345 | $ADDU $v0,$ta3,$ta2 |
| 346 | 346 | ||
| 347 | beqz $a2,.L_bn_mul_words_return | 347 | beqz $a2,.L_bn_mulw_words_return |
| 348 | nop | 348 | nop |
| 349 | 349 | ||
| 350 | .L_bn_mul_words_tail: | 350 | .L_bn_mulw_words_tail: |
| 351 | .set reorder | 351 | .set reorder |
| 352 | $LD $t0,0($a1) | 352 | $LD $t0,0($a1) |
| 353 | $MULTU $t0,$a3 | 353 | $MULTU $t0,$a3 |
| @@ -358,7 +358,7 @@ $code.=<<___; | |||
| 358 | sltu $t1,$v0,$at | 358 | sltu $t1,$v0,$at |
| 359 | $ST $v0,0($a0) | 359 | $ST $v0,0($a0) |
| 360 | $ADDU $v0,$t1,$t0 | 360 | $ADDU $v0,$t1,$t0 |
| 361 | beqz $a2,.L_bn_mul_words_return | 361 | beqz $a2,.L_bn_mulw_words_return |
| 362 | 362 | ||
| 363 | $LD $t0,$BNSZ($a1) | 363 | $LD $t0,$BNSZ($a1) |
| 364 | $MULTU $t0,$a3 | 364 | $MULTU $t0,$a3 |
| @@ -369,7 +369,7 @@ $code.=<<___; | |||
| 369 | sltu $t1,$v0,$at | 369 | sltu $t1,$v0,$at |
| 370 | $ST $v0,$BNSZ($a0) | 370 | $ST $v0,$BNSZ($a0) |
| 371 | $ADDU $v0,$t1,$t0 | 371 | $ADDU $v0,$t1,$t0 |
| 372 | beqz $a2,.L_bn_mul_words_return | 372 | beqz $a2,.L_bn_mulw_words_return |
| 373 | 373 | ||
| 374 | $LD $t0,2*$BNSZ($a1) | 374 | $LD $t0,2*$BNSZ($a1) |
| 375 | $MULTU $t0,$a3 | 375 | $MULTU $t0,$a3 |
| @@ -380,7 +380,7 @@ $code.=<<___; | |||
| 380 | $ST $v0,2*$BNSZ($a0) | 380 | $ST $v0,2*$BNSZ($a0) |
| 381 | $ADDU $v0,$t1,$t0 | 381 | $ADDU $v0,$t1,$t0 |
| 382 | 382 | ||
| 383 | .L_bn_mul_words_return: | 383 | .L_bn_mulw_words_return: |
| 384 | .set noreorder | 384 | .set noreorder |
| 385 | ___ | 385 | ___ |
| 386 | $code.=<<___ if ($flavour =~ /nubi/i); | 386 | $code.=<<___ if ($flavour =~ /nubi/i); |
| @@ -394,22 +394,22 @@ ___ | |||
| 394 | $code.=<<___; | 394 | $code.=<<___; |
| 395 | jr $ra | 395 | jr $ra |
| 396 | move $a0,$v0 | 396 | move $a0,$v0 |
| 397 | .end bn_mul_words_internal | 397 | .end bn_mulw_words_internal |
| 398 | 398 | ||
| 399 | .align 5 | 399 | .align 5 |
| 400 | .globl bn_sqr_words | 400 | .globl bn_sqr_word_wise |
| 401 | .ent bn_sqr_words | 401 | .ent bn_sqr_word_wise |
| 402 | bn_sqr_words: | 402 | bn_sqr_word_wise: |
| 403 | .set noreorder | 403 | .set noreorder |
| 404 | bgtz $a2,bn_sqr_words_internal | 404 | bgtz $a2,bn_sqr_word_wise_internal |
| 405 | move $v0,$zero | 405 | move $v0,$zero |
| 406 | jr $ra | 406 | jr $ra |
| 407 | move $a0,$v0 | 407 | move $a0,$v0 |
| 408 | .end bn_sqr_words | 408 | .end bn_sqr_word_wise |
| 409 | 409 | ||
| 410 | .align 5 | 410 | .align 5 |
| 411 | .ent bn_sqr_words_internal | 411 | .ent bn_sqr_word_wise_internal |
| 412 | bn_sqr_words_internal: | 412 | bn_sqr_word_wise_internal: |
| 413 | ___ | 413 | ___ |
| 414 | $code.=<<___ if ($flavour =~ /nubi/i); | 414 | $code.=<<___ if ($flavour =~ /nubi/i); |
| 415 | .frame $sp,6*$SZREG,$ra | 415 | .frame $sp,6*$SZREG,$ra |
| @@ -427,9 +427,9 @@ $code.=<<___; | |||
| 427 | .set reorder | 427 | .set reorder |
| 428 | li $minus4,-4 | 428 | li $minus4,-4 |
| 429 | and $ta0,$a2,$minus4 | 429 | and $ta0,$a2,$minus4 |
| 430 | beqz $ta0,.L_bn_sqr_words_tail | 430 | beqz $ta0,.L_bn_sqr_word_wise_tail |
| 431 | 431 | ||
| 432 | .L_bn_sqr_words_loop: | 432 | .L_bn_sqr_word_wise_loop: |
| 433 | $LD $t0,0($a1) | 433 | $LD $t0,0($a1) |
| 434 | $MULTU $t0,$t0 | 434 | $MULTU $t0,$t0 |
| 435 | $LD $t2,$BNSZ($a1) | 435 | $LD $t2,$BNSZ($a1) |
| @@ -463,13 +463,13 @@ $code.=<<___; | |||
| 463 | $ST $ta3,-2*$BNSZ($a0) | 463 | $ST $ta3,-2*$BNSZ($a0) |
| 464 | 464 | ||
| 465 | .set noreorder | 465 | .set noreorder |
| 466 | bgtz $ta0,.L_bn_sqr_words_loop | 466 | bgtz $ta0,.L_bn_sqr_word_wise_loop |
| 467 | $ST $ta2,-$BNSZ($a0) | 467 | $ST $ta2,-$BNSZ($a0) |
| 468 | 468 | ||
| 469 | beqz $a2,.L_bn_sqr_words_return | 469 | beqz $a2,.L_bn_sqr_word_wise_return |
| 470 | nop | 470 | nop |
| 471 | 471 | ||
| 472 | .L_bn_sqr_words_tail: | 472 | .L_bn_sqr_word_wise_tail: |
| 473 | .set reorder | 473 | .set reorder |
| 474 | $LD $t0,0($a1) | 474 | $LD $t0,0($a1) |
| 475 | $MULTU $t0,$t0 | 475 | $MULTU $t0,$t0 |
| @@ -478,7 +478,7 @@ $code.=<<___; | |||
| 478 | mfhi $t0 | 478 | mfhi $t0 |
| 479 | $ST $t1,0($a0) | 479 | $ST $t1,0($a0) |
| 480 | $ST $t0,$BNSZ($a0) | 480 | $ST $t0,$BNSZ($a0) |
| 481 | beqz $a2,.L_bn_sqr_words_return | 481 | beqz $a2,.L_bn_sqr_word_wise_return |
| 482 | 482 | ||
| 483 | $LD $t0,$BNSZ($a1) | 483 | $LD $t0,$BNSZ($a1) |
| 484 | $MULTU $t0,$t0 | 484 | $MULTU $t0,$t0 |
| @@ -487,7 +487,7 @@ $code.=<<___; | |||
| 487 | mfhi $t0 | 487 | mfhi $t0 |
| 488 | $ST $t1,2*$BNSZ($a0) | 488 | $ST $t1,2*$BNSZ($a0) |
| 489 | $ST $t0,3*$BNSZ($a0) | 489 | $ST $t0,3*$BNSZ($a0) |
| 490 | beqz $a2,.L_bn_sqr_words_return | 490 | beqz $a2,.L_bn_sqr_word_wise_return |
| 491 | 491 | ||
| 492 | $LD $t0,2*$BNSZ($a1) | 492 | $LD $t0,2*$BNSZ($a1) |
| 493 | $MULTU $t0,$t0 | 493 | $MULTU $t0,$t0 |
| @@ -496,7 +496,7 @@ $code.=<<___; | |||
| 496 | $ST $t1,4*$BNSZ($a0) | 496 | $ST $t1,4*$BNSZ($a0) |
| 497 | $ST $t0,5*$BNSZ($a0) | 497 | $ST $t0,5*$BNSZ($a0) |
| 498 | 498 | ||
| 499 | .L_bn_sqr_words_return: | 499 | .L_bn_sqr_word_wise_return: |
| 500 | .set noreorder | 500 | .set noreorder |
| 501 | ___ | 501 | ___ |
| 502 | $code.=<<___ if ($flavour =~ /nubi/i); | 502 | $code.=<<___ if ($flavour =~ /nubi/i); |
| @@ -511,7 +511,7 @@ $code.=<<___; | |||
| 511 | jr $ra | 511 | jr $ra |
| 512 | move $a0,$v0 | 512 | move $a0,$v0 |
| 513 | 513 | ||
| 514 | .end bn_sqr_words_internal | 514 | .end bn_sqr_word_wise_internal |
| 515 | 515 | ||
| 516 | .align 5 | 516 | .align 5 |
| 517 | .globl bn_add_words | 517 | .globl bn_add_words |
diff --git a/src/lib/libcrypto/bn/asm/ppc.pl b/src/lib/libcrypto/bn/asm/ppc.pl index c9b7f9477d..9b8dc55bff 100644 --- a/src/lib/libcrypto/bn/asm/ppc.pl +++ b/src/lib/libcrypto/bn/asm/ppc.pl | |||
| @@ -204,9 +204,9 @@ $data=<<EOF; | |||
| 204 | # bn_sub_words | 204 | # bn_sub_words |
| 205 | # bn_add_words | 205 | # bn_add_words |
| 206 | # bn_div_words | 206 | # bn_div_words |
| 207 | # bn_sqr_words | 207 | # bn_sqr_word_wise |
| 208 | # bn_mul_words | 208 | # bn_mulw_words |
| 209 | # bn_mul_add_words | 209 | # bn_mulw_add_words |
| 210 | # | 210 | # |
| 211 | # NOTE: It is possible to optimize this code more for | 211 | # NOTE: It is possible to optimize this code more for |
| 212 | # specific PowerPC or Power architectures. On the Northstar | 212 | # specific PowerPC or Power architectures. On the Northstar |
| @@ -248,9 +248,9 @@ $data=<<EOF; | |||
| 248 | .globl .bn_sub_words | 248 | .globl .bn_sub_words |
| 249 | .globl .bn_add_words | 249 | .globl .bn_add_words |
| 250 | .globl .bn_div_words | 250 | .globl .bn_div_words |
| 251 | .globl .bn_sqr_words | 251 | .globl .bn_sqr_word_wise |
| 252 | .globl .bn_mul_words | 252 | .globl .bn_mulw_words |
| 253 | .globl .bn_mul_add_words | 253 | .globl .bn_mulw_add_words |
| 254 | 254 | ||
| 255 | # .text section | 255 | # .text section |
| 256 | 256 | ||
| @@ -1702,16 +1702,16 @@ Lppcasm_div9: | |||
| 1702 | 1702 | ||
| 1703 | # | 1703 | # |
| 1704 | # NOTE: The following label name should be changed to | 1704 | # NOTE: The following label name should be changed to |
| 1705 | # "bn_sqr_words" i.e. remove the first dot | 1705 | # "bn_sqr_word_wise" i.e. remove the first dot |
| 1706 | # for the gcc compiler. This should be automatically | 1706 | # for the gcc compiler. This should be automatically |
| 1707 | # done in the build | 1707 | # done in the build |
| 1708 | # | 1708 | # |
| 1709 | .align 4 | 1709 | .align 4 |
| 1710 | .bn_sqr_words: | 1710 | .bn_sqr_word_wise: |
| 1711 | # | 1711 | # |
| 1712 | # Optimized version of bn_sqr_words | 1712 | # Optimized version of bn_sqr_word_wise |
| 1713 | # | 1713 | # |
| 1714 | # void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) | 1714 | # void bn_sqr_word_wise(BN_ULONG *r, BN_ULONG *a, int n) |
| 1715 | # | 1715 | # |
| 1716 | # r3 = r | 1716 | # r3 = r |
| 1717 | # r4 = a | 1717 | # r4 = a |
| @@ -1740,15 +1740,15 @@ Lppcasm_sqr_adios: | |||
| 1740 | 1740 | ||
| 1741 | # | 1741 | # |
| 1742 | # NOTE: The following label name should be changed to | 1742 | # NOTE: The following label name should be changed to |
| 1743 | # "bn_mul_words" i.e. remove the first dot | 1743 | # "bn_mulw_words" i.e. remove the first dot |
| 1744 | # for the gcc compiler. This should be automatically | 1744 | # for the gcc compiler. This should be automatically |
| 1745 | # done in the build | 1745 | # done in the build |
| 1746 | # | 1746 | # |
| 1747 | 1747 | ||
| 1748 | .align 4 | 1748 | .align 4 |
| 1749 | .bn_mul_words: | 1749 | .bn_mulw_words: |
| 1750 | # | 1750 | # |
| 1751 | # BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | 1751 | # BN_ULONG bn_mulw_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) |
| 1752 | # | 1752 | # |
| 1753 | # r3 = rp | 1753 | # r3 = rp |
| 1754 | # r4 = ap | 1754 | # r4 = ap |
| @@ -1842,15 +1842,15 @@ Lppcasm_mw_OVER: | |||
| 1842 | 1842 | ||
| 1843 | # | 1843 | # |
| 1844 | # NOTE: The following label name should be changed to | 1844 | # NOTE: The following label name should be changed to |
| 1845 | # "bn_mul_add_words" i.e. remove the first dot | 1845 | # "bn_mulw_add_words" i.e. remove the first dot |
| 1846 | # for the gcc compiler. This should be automatically | 1846 | # for the gcc compiler. This should be automatically |
| 1847 | # done in the build | 1847 | # done in the build |
| 1848 | # | 1848 | # |
| 1849 | 1849 | ||
| 1850 | .align 4 | 1850 | .align 4 |
| 1851 | .bn_mul_add_words: | 1851 | .bn_mulw_add_words: |
| 1852 | # | 1852 | # |
| 1853 | # BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | 1853 | # BN_ULONG bn_mulw_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) |
| 1854 | # | 1854 | # |
| 1855 | # r3 = rp | 1855 | # r3 = rp |
| 1856 | # r4 = ap | 1856 | # r4 = ap |
diff --git a/src/lib/libcrypto/bn/asm/x86-mont.pl b/src/lib/libcrypto/bn/asm/x86-mont.pl index 6524651748..3be440f11f 100755 --- a/src/lib/libcrypto/bn/asm/x86-mont.pl +++ b/src/lib/libcrypto/bn/asm/x86-mont.pl | |||
| @@ -32,8 +32,7 @@ require "x86asm.pl"; | |||
| 32 | 32 | ||
| 33 | &asm_init($ARGV[0],$0); | 33 | &asm_init($ARGV[0],$0); |
| 34 | 34 | ||
| 35 | $sse2=0; | 35 | $sse2=1; |
| 36 | for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | ||
| 37 | 36 | ||
| 38 | &external_label("OPENSSL_ia32cap_P") if ($sse2); | 37 | &external_label("OPENSSL_ia32cap_P") if ($sse2); |
| 39 | 38 | ||
diff --git a/src/lib/libcrypto/bn/bn.h b/src/lib/libcrypto/bn/bn.h index 7c3c0b142f..3f9e24a868 100644 --- a/src/lib/libcrypto/bn/bn.h +++ b/src/lib/libcrypto/bn/bn.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn.h,v 1.80 2025/03/09 15:22:40 tb Exp $ */ | 1 | /* $OpenBSD: bn.h,v 1.85 2025/12/05 17:25:55 tb Exp $ */ |
| 2 | /* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -125,6 +125,8 @@ | |||
| 125 | #ifndef HEADER_BN_H | 125 | #ifndef HEADER_BN_H |
| 126 | #define HEADER_BN_H | 126 | #define HEADER_BN_H |
| 127 | 127 | ||
| 128 | #include <inttypes.h> | ||
| 129 | #include <stdint.h> | ||
| 128 | #include <stdio.h> | 130 | #include <stdio.h> |
| 129 | #include <stdlib.h> | 131 | #include <stdlib.h> |
| 130 | 132 | ||
| @@ -138,59 +140,17 @@ | |||
| 138 | extern "C" { | 140 | extern "C" { |
| 139 | #endif | 141 | #endif |
| 140 | 142 | ||
| 141 | /* This next option uses the C libraries (2 word)/(1 word) function. | 143 | #if defined(_LP64) || defined(_WIN64) |
| 142 | * If it is not defined, I use my C version (which is slower). | ||
| 143 | * The reason for this flag is that when the particular C compiler | ||
| 144 | * library routine is used, and the library is linked with a different | ||
| 145 | * compiler, the library is missing. This mostly happens when the | ||
| 146 | * library is built with gcc and then linked using normal cc. This would | ||
| 147 | * be a common occurrence because gcc normally produces code that is | ||
| 148 | * 2 times faster than system compilers for the big number stuff. | ||
| 149 | * For machines with only one compiler (or shared libraries), this should | ||
| 150 | * be on. Again this in only really a problem on machines | ||
| 151 | * using "long long's", are 32bit, and are not using my assembler code. */ | ||
| 152 | /* #define BN_DIV2W */ | ||
| 153 | |||
| 154 | #ifdef _LP64 | ||
| 155 | #undef BN_LLONG | 144 | #undef BN_LLONG |
| 156 | #define BN_ULONG unsigned long | 145 | #define BN_ULONG uint64_t |
| 157 | #define BN_LONG long | ||
| 158 | #define BN_BITS 128 | ||
| 159 | #define BN_BYTES 8 | 146 | #define BN_BYTES 8 |
| 160 | #define BN_BITS2 64 | 147 | #define BN_BITS2 64 |
| 161 | #define BN_BITS4 32 | ||
| 162 | #define BN_MASK2 (0xffffffffffffffffL) | ||
| 163 | #define BN_MASK2l (0xffffffffL) | ||
| 164 | #define BN_MASK2h (0xffffffff00000000L) | ||
| 165 | #define BN_MASK2h1 (0xffffffff80000000L) | ||
| 166 | #define BN_TBIT (0x8000000000000000L) | ||
| 167 | #define BN_DEC_CONV (10000000000000000000UL) | ||
| 168 | #define BN_DEC_FMT1 "%lu" | ||
| 169 | #define BN_DEC_FMT2 "%019lu" | ||
| 170 | #define BN_DEC_NUM 19 | ||
| 171 | #define BN_HEX_FMT1 "%lX" | ||
| 172 | #define BN_HEX_FMT2 "%016lX" | ||
| 173 | #else | 148 | #else |
| 174 | #define BN_ULLONG unsigned long long | 149 | #define BN_ULLONG uint64_t |
| 175 | #define BN_LLONG | 150 | #define BN_LLONG |
| 176 | #define BN_ULONG unsigned int | 151 | #define BN_ULONG uint32_t |
| 177 | #define BN_LONG int | ||
| 178 | #define BN_BITS 64 | ||
| 179 | #define BN_BYTES 4 | 152 | #define BN_BYTES 4 |
| 180 | #define BN_BITS2 32 | 153 | #define BN_BITS2 32 |
| 181 | #define BN_BITS4 16 | ||
| 182 | #define BN_MASK (0xffffffffffffffffLL) | ||
| 183 | #define BN_MASK2 (0xffffffffL) | ||
| 184 | #define BN_MASK2l (0xffff) | ||
| 185 | #define BN_MASK2h1 (0xffff8000L) | ||
| 186 | #define BN_MASK2h (0xffff0000L) | ||
| 187 | #define BN_TBIT (0x80000000L) | ||
| 188 | #define BN_DEC_CONV (1000000000L) | ||
| 189 | #define BN_DEC_FMT1 "%u" | ||
| 190 | #define BN_DEC_FMT2 "%09u" | ||
| 191 | #define BN_DEC_NUM 9 | ||
| 192 | #define BN_HEX_FMT1 "%X" | ||
| 193 | #define BN_HEX_FMT2 "%08X" | ||
| 194 | #endif | 154 | #endif |
| 195 | 155 | ||
| 196 | #define BN_FLG_MALLOCED 0x01 | 156 | #define BN_FLG_MALLOCED 0x01 |
diff --git a/src/lib/libcrypto/bn/bn_add.c b/src/lib/libcrypto/bn/bn_add.c index 86768a312a..81fa60e429 100644 --- a/src/lib/libcrypto/bn/bn_add.c +++ b/src/lib/libcrypto/bn/bn_add.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_add.c,v 1.26 2023/07/08 12:21:58 beck Exp $ */ | 1 | /* $OpenBSD: bn_add.c,v 1.29 2025/05/25 04:53:05 jsing Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -60,44 +60,10 @@ | |||
| 60 | #include <limits.h> | 60 | #include <limits.h> |
| 61 | #include <stdio.h> | 61 | #include <stdio.h> |
| 62 | 62 | ||
| 63 | #include <openssl/err.h> | ||
| 64 | |||
| 65 | #include "bn_arch.h" | 63 | #include "bn_arch.h" |
| 66 | #include "bn_local.h" | 64 | #include "bn_local.h" |
| 67 | #include "bn_internal.h" | 65 | #include "bn_internal.h" |
| 68 | 66 | #include "err_local.h" | |
| 69 | /* | ||
| 70 | * bn_add_words() computes (carry:r[i]) = a[i] + b[i] + carry, where a and b | ||
| 71 | * are both arrays of words. Any carry resulting from the addition is returned. | ||
| 72 | */ | ||
| 73 | #ifndef HAVE_BN_ADD_WORDS | ||
| 74 | BN_ULONG | ||
| 75 | bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | ||
| 76 | { | ||
| 77 | BN_ULONG carry = 0; | ||
| 78 | |||
| 79 | assert(n >= 0); | ||
| 80 | if (n <= 0) | ||
| 81 | return 0; | ||
| 82 | |||
| 83 | while (n & ~3) { | ||
| 84 | bn_qwaddqw(a[3], a[2], a[1], a[0], b[3], b[2], b[1], b[0], | ||
| 85 | carry, &carry, &r[3], &r[2], &r[1], &r[0]); | ||
| 86 | a += 4; | ||
| 87 | b += 4; | ||
| 88 | r += 4; | ||
| 89 | n -= 4; | ||
| 90 | } | ||
| 91 | while (n) { | ||
| 92 | bn_addw_addw(a[0], b[0], carry, &carry, &r[0]); | ||
| 93 | a++; | ||
| 94 | b++; | ||
| 95 | r++; | ||
| 96 | n--; | ||
| 97 | } | ||
| 98 | return carry; | ||
| 99 | } | ||
| 100 | #endif | ||
| 101 | 67 | ||
| 102 | /* | 68 | /* |
| 103 | * bn_add() computes (carry:r[i]) = a[i] + b[i] + carry, where a and b are both | 69 | * bn_add() computes (carry:r[i]) = a[i] + b[i] + carry, where a and b are both |
| @@ -147,40 +113,6 @@ bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b, | |||
| 147 | #endif | 113 | #endif |
| 148 | 114 | ||
| 149 | /* | 115 | /* |
| 150 | * bn_sub_words() computes (borrow:r[i]) = a[i] - b[i] - borrow, where a and b | ||
| 151 | * are both arrays of words. Any borrow resulting from the subtraction is | ||
| 152 | * returned. | ||
| 153 | */ | ||
| 154 | #ifndef HAVE_BN_SUB_WORDS | ||
| 155 | BN_ULONG | ||
| 156 | bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | ||
| 157 | { | ||
| 158 | BN_ULONG borrow = 0; | ||
| 159 | |||
| 160 | assert(n >= 0); | ||
| 161 | if (n <= 0) | ||
| 162 | return 0; | ||
| 163 | |||
| 164 | while (n & ~3) { | ||
| 165 | bn_qwsubqw(a[3], a[2], a[1], a[0], b[3], b[2], b[1], b[0], | ||
| 166 | borrow, &borrow, &r[3], &r[2], &r[1], &r[0]); | ||
| 167 | a += 4; | ||
| 168 | b += 4; | ||
| 169 | r += 4; | ||
| 170 | n -= 4; | ||
| 171 | } | ||
| 172 | while (n) { | ||
| 173 | bn_subw_subw(a[0], b[0], borrow, &borrow, &r[0]); | ||
| 174 | a++; | ||
| 175 | b++; | ||
| 176 | r++; | ||
| 177 | n--; | ||
| 178 | } | ||
| 179 | return borrow; | ||
| 180 | } | ||
| 181 | #endif | ||
| 182 | |||
| 183 | /* | ||
| 184 | * bn_sub() computes (borrow:r[i]) = a[i] - b[i] - borrow, where a and b are both | 116 | * bn_sub() computes (borrow:r[i]) = a[i] - b[i] - borrow, where a and b are both |
| 185 | * arrays of words (r may be the same as a or b). The length of a and b may | 117 | * arrays of words (r may be the same as a or b). The length of a and b may |
| 186 | * differ, while r must be at least max(a_len, b_len) in length. Any borrow | 118 | * differ, while r must be at least max(a_len, b_len) in length. Any borrow |
| @@ -208,7 +140,7 @@ bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b, | |||
| 208 | /* XXX - consider doing four at a time to match bn_sub_words. */ | 140 | /* XXX - consider doing four at a time to match bn_sub_words. */ |
| 209 | while (diff_len < 0) { | 141 | while (diff_len < 0) { |
| 210 | /* Compute r[0] = 0 - b[0] - borrow. */ | 142 | /* Compute r[0] = 0 - b[0] - borrow. */ |
| 211 | bn_subw(0 - b[0], borrow, &borrow, &r[0]); | 143 | bn_subw_subw(0, b[0], borrow, &borrow, &r[0]); |
| 212 | diff_len++; | 144 | diff_len++; |
| 213 | b++; | 145 | b++; |
| 214 | r++; | 146 | r++; |
| @@ -217,7 +149,7 @@ bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b, | |||
| 217 | /* XXX - consider doing four at a time to match bn_sub_words. */ | 149 | /* XXX - consider doing four at a time to match bn_sub_words. */ |
| 218 | while (diff_len > 0) { | 150 | while (diff_len > 0) { |
| 219 | /* Compute r[0] = a[0] - 0 - borrow. */ | 151 | /* Compute r[0] = a[0] - 0 - borrow. */ |
| 220 | bn_subw(a[0], borrow, &borrow, &r[0]); | 152 | bn_subw_subw(a[0], 0, borrow, &borrow, &r[0]); |
| 221 | diff_len--; | 153 | diff_len--; |
| 222 | a++; | 154 | a++; |
| 223 | r++; | 155 | r++; |
diff --git a/src/lib/libcrypto/bn/bn_add_sub.c b/src/lib/libcrypto/bn/bn_add_sub.c new file mode 100644 index 0000000000..5c9d5a2b1a --- /dev/null +++ b/src/lib/libcrypto/bn/bn_add_sub.c | |||
| @@ -0,0 +1,178 @@ | |||
| 1 | /* $OpenBSD: bn_add_sub.c,v 1.1 2025/05/25 04:30:55 jsing Exp $ */ | ||
| 2 | /* | ||
| 3 | * Copyright (c) 2023,2024,2025 Joel Sing <jsing@openbsd.org> | ||
| 4 | * | ||
| 5 | * Permission to use, copy, modify, and distribute this software for any | ||
| 6 | * purpose with or without fee is hereby granted, provided that the above | ||
| 7 | * copyright notice and this permission notice appear in all copies. | ||
| 8 | * | ||
| 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #include <openssl/bn.h> | ||
| 19 | |||
| 20 | #include "bn_internal.h" | ||
| 21 | |||
| 22 | /* | ||
| 23 | * bn_add_words() computes (carry:r[i]) = a[i] + b[i] + carry, where a and b | ||
| 24 | * are both arrays of words. Any carry resulting from the addition is returned. | ||
| 25 | */ | ||
| 26 | #ifndef HAVE_BN_ADD_WORDS | ||
| 27 | BN_ULONG | ||
| 28 | bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | ||
| 29 | { | ||
| 30 | BN_ULONG carry = 0; | ||
| 31 | |||
| 32 | while (n >= 4) { | ||
| 33 | bn_qwaddqw(a[3], a[2], a[1], a[0], b[3], b[2], b[1], b[0], | ||
| 34 | carry, &carry, &r[3], &r[2], &r[1], &r[0]); | ||
| 35 | a += 4; | ||
| 36 | b += 4; | ||
| 37 | r += 4; | ||
| 38 | n -= 4; | ||
| 39 | } | ||
| 40 | while (n > 0) { | ||
| 41 | bn_addw_addw(a[0], b[0], carry, &carry, &r[0]); | ||
| 42 | a++; | ||
| 43 | b++; | ||
| 44 | r++; | ||
| 45 | n--; | ||
| 46 | } | ||
| 47 | |||
| 48 | return carry; | ||
| 49 | } | ||
| 50 | #endif | ||
| 51 | |||
| 52 | /* | ||
| 53 | * bn_sub_words() computes (borrow:r[i]) = a[i] - b[i] - borrow, where a and b | ||
| 54 | * are both arrays of words. Any borrow resulting from the subtraction is | ||
| 55 | * returned. | ||
| 56 | */ | ||
| 57 | #ifndef HAVE_BN_SUB_WORDS | ||
| 58 | BN_ULONG | ||
| 59 | bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | ||
| 60 | { | ||
| 61 | BN_ULONG borrow = 0; | ||
| 62 | |||
| 63 | while (n >= 4) { | ||
| 64 | bn_qwsubqw(a[3], a[2], a[1], a[0], b[3], b[2], b[1], b[0], | ||
| 65 | borrow, &borrow, &r[3], &r[2], &r[1], &r[0]); | ||
| 66 | a += 4; | ||
| 67 | b += 4; | ||
| 68 | r += 4; | ||
| 69 | n -= 4; | ||
| 70 | } | ||
| 71 | while (n > 0) { | ||
| 72 | bn_subw_subw(a[0], b[0], borrow, &borrow, &r[0]); | ||
| 73 | a++; | ||
| 74 | b++; | ||
| 75 | r++; | ||
| 76 | n--; | ||
| 77 | } | ||
| 78 | |||
| 79 | return borrow; | ||
| 80 | } | ||
| 81 | #endif | ||
| 82 | |||
| 83 | /* | ||
| 84 | * bn_sub_borrow() computes a[i] - b[i], returning the resulting borrow only. | ||
| 85 | */ | ||
| 86 | #ifndef HAVE_BN_SUB_WORDS_BORROW | ||
| 87 | BN_ULONG | ||
| 88 | bn_sub_words_borrow(const BN_ULONG *a, const BN_ULONG *b, size_t n) | ||
| 89 | { | ||
| 90 | BN_ULONG borrow = 0; | ||
| 91 | BN_ULONG r; | ||
| 92 | |||
| 93 | while (n >= 4) { | ||
| 94 | bn_qwsubqw(a[3], a[2], a[1], a[0], b[3], b[2], b[1], b[0], | ||
| 95 | borrow, &borrow, &r, &r, &r, &r); | ||
| 96 | a += 4; | ||
| 97 | b += 4; | ||
| 98 | n -= 4; | ||
| 99 | } | ||
| 100 | while (n > 0) { | ||
| 101 | bn_subw_subw(a[0], b[0], borrow, &borrow, &r); | ||
| 102 | a++; | ||
| 103 | b++; | ||
| 104 | n--; | ||
| 105 | } | ||
| 106 | |||
| 107 | return borrow; | ||
| 108 | } | ||
| 109 | #endif | ||
| 110 | |||
| 111 | /* | ||
| 112 | * bn_add_words_masked() computes r[] = a[] + (b[] & mask), where a, b and r are | ||
| 113 | * arrays of words with length n (r may be the same as a or b). | ||
| 114 | */ | ||
| 115 | #ifndef HAVE_BN_ADD_WORDS_MASKED | ||
| 116 | BN_ULONG | ||
| 117 | bn_add_words_masked(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | ||
| 118 | BN_ULONG mask, size_t n) | ||
| 119 | { | ||
| 120 | BN_ULONG carry = 0; | ||
| 121 | |||
| 122 | /* XXX - consider conditional/masked versions of bn_addw_addw/bn_qwaddqw. */ | ||
| 123 | |||
| 124 | while (n >= 4) { | ||
| 125 | bn_qwaddqw(a[3], a[2], a[1], a[0], b[3] & mask, b[2] & mask, | ||
| 126 | b[1] & mask, b[0] & mask, carry, &carry, &r[3], &r[2], | ||
| 127 | &r[1], &r[0]); | ||
| 128 | a += 4; | ||
| 129 | b += 4; | ||
| 130 | r += 4; | ||
| 131 | n -= 4; | ||
| 132 | } | ||
| 133 | while (n > 0) { | ||
| 134 | bn_addw_addw(a[0], b[0] & mask, carry, &carry, &r[0]); | ||
| 135 | a++; | ||
| 136 | b++; | ||
| 137 | r++; | ||
| 138 | n--; | ||
| 139 | } | ||
| 140 | |||
| 141 | return carry; | ||
| 142 | } | ||
| 143 | #endif | ||
| 144 | |||
| 145 | /* | ||
| 146 | * bn_sub_words_masked() computes r[] = a[] - (b[] & mask), where a, b and r are | ||
| 147 | * arrays of words with length n (r may be the same as a or b). | ||
| 148 | */ | ||
| 149 | #ifndef HAVE_BN_SUB_WORDS_MASKED | ||
| 150 | BN_ULONG | ||
| 151 | bn_sub_words_masked(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | ||
| 152 | BN_ULONG mask, size_t n) | ||
| 153 | { | ||
| 154 | BN_ULONG borrow = 0; | ||
| 155 | |||
| 156 | /* XXX - consider conditional/masked versions of bn_subw_subw/bn_qwsubqw. */ | ||
| 157 | |||
| 158 | /* Compute conditional r[i] = a[i] - b[i]. */ | ||
| 159 | while (n >= 4) { | ||
| 160 | bn_qwsubqw(a[3], a[2], a[1], a[0], b[3] & mask, b[2] & mask, | ||
| 161 | b[1] & mask, b[0] & mask, borrow, &borrow, &r[3], &r[2], | ||
| 162 | &r[1], &r[0]); | ||
| 163 | a += 4; | ||
| 164 | b += 4; | ||
| 165 | r += 4; | ||
| 166 | n -= 4; | ||
| 167 | } | ||
| 168 | while (n > 0) { | ||
| 169 | bn_subw_subw(a[0], b[0] & mask, borrow, &borrow, &r[0]); | ||
| 170 | a++; | ||
| 171 | b++; | ||
| 172 | r++; | ||
| 173 | n--; | ||
| 174 | } | ||
| 175 | |||
| 176 | return borrow; | ||
| 177 | } | ||
| 178 | #endif | ||
diff --git a/src/lib/libcrypto/bn/bn_const.c b/src/lib/libcrypto/bn/bn_const.c index bf684c8a46..389e95ca15 100644 --- a/src/lib/libcrypto/bn/bn_const.c +++ b/src/lib/libcrypto/bn/bn_const.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_const.c,v 1.8 2023/07/28 10:07:30 tb Exp $ */ | 1 | /* $OpenBSD: bn_const.c,v 1.9 2026/01/23 08:29:04 tb Exp $ */ |
| 2 | /* Insert boilerplate */ | 2 | /* Insert boilerplate */ |
| 3 | 3 | ||
| 4 | #include <openssl/bn.h> | 4 | #include <openssl/bn.h> |
| @@ -431,3 +431,295 @@ BN_get_rfc3526_prime_8192(BIGNUM *bn) | |||
| 431 | return BN_bin2bn(RFC3526_PRIME_8192, sizeof(RFC3526_PRIME_8192), bn); | 431 | return BN_bin2bn(RFC3526_PRIME_8192, sizeof(RFC3526_PRIME_8192), bn); |
| 432 | } | 432 | } |
| 433 | LCRYPTO_ALIAS(BN_get_rfc3526_prime_8192); | 433 | LCRYPTO_ALIAS(BN_get_rfc3526_prime_8192); |
| 434 | |||
| 435 | static const unsigned char RFC7919_PRIME_2048[] = { | ||
| 436 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xAD, 0xF8, 0x54, 0x58, | ||
| 437 | 0xA2, 0xBB, 0x4A, 0x9A, 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, | ||
| 438 | 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, 0xA9, 0xE1, 0x36, 0x41, | ||
| 439 | 0x14, 0x64, 0x33, 0xFB, 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, | ||
| 440 | 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, 0xF6, 0x81, 0xB2, 0x02, | ||
| 441 | 0xAE, 0xC4, 0x61, 0x7A, 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, | ||
| 442 | 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, 0x85, 0x63, 0x65, 0x55, | ||
| 443 | 0x3D, 0xED, 0x1A, 0xF3, 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, | ||
| 444 | 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, 0xE2, 0xA6, 0x89, 0xDA, | ||
| 445 | 0xF3, 0xEF, 0xE8, 0x72, 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, | ||
| 446 | 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, 0xBC, 0x0A, 0xB1, 0x82, | ||
| 447 | 0xB3, 0x24, 0xFB, 0x61, 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, | ||
| 448 | 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, 0x1D, 0x4F, 0x42, 0xA3, | ||
| 449 | 0xDE, 0x39, 0x4D, 0xF4, 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, | ||
| 450 | 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, 0x9E, 0x02, 0xFC, 0xE1, | ||
| 451 | 0xCD, 0xF7, 0xE2, 0xEC, 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, | ||
| 452 | 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, 0x8E, 0x4F, 0x12, 0x32, | ||
| 453 | 0xEE, 0xF2, 0x81, 0x83, 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, | ||
| 454 | 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, 0xC5, 0x8E, 0xF1, 0x83, | ||
| 455 | 0x7D, 0x16, 0x83, 0xB2, 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, | ||
| 456 | 0x88, 0x6B, 0x42, 0x38, 0x61, 0x28, 0x5C, 0x97, 0xFF, 0xFF, 0xFF, 0xFF, | ||
| 457 | 0xFF, 0xFF, 0xFF, 0xFF, | ||
| 458 | }; | ||
| 459 | |||
| 460 | BIGNUM * | ||
| 461 | BN_get_rfc7919_prime_2048(BIGNUM *bn) | ||
| 462 | { | ||
| 463 | return BN_bin2bn(RFC7919_PRIME_2048, sizeof(RFC7919_PRIME_2048), bn); | ||
| 464 | } | ||
| 465 | |||
| 466 | static const unsigned char RFC7919_PRIME_3072[] = { | ||
| 467 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xAD, 0xF8, 0x54, 0x58, | ||
| 468 | 0xA2, 0xBB, 0x4A, 0x9A, 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, | ||
| 469 | 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, 0xA9, 0xE1, 0x36, 0x41, | ||
| 470 | 0x14, 0x64, 0x33, 0xFB, 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, | ||
| 471 | 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, 0xF6, 0x81, 0xB2, 0x02, | ||
| 472 | 0xAE, 0xC4, 0x61, 0x7A, 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, | ||
| 473 | 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, 0x85, 0x63, 0x65, 0x55, | ||
| 474 | 0x3D, 0xED, 0x1A, 0xF3, 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, | ||
| 475 | 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, 0xE2, 0xA6, 0x89, 0xDA, | ||
| 476 | 0xF3, 0xEF, 0xE8, 0x72, 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, | ||
| 477 | 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, 0xBC, 0x0A, 0xB1, 0x82, | ||
| 478 | 0xB3, 0x24, 0xFB, 0x61, 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, | ||
| 479 | 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, 0x1D, 0x4F, 0x42, 0xA3, | ||
| 480 | 0xDE, 0x39, 0x4D, 0xF4, 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, | ||
| 481 | 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, 0x9E, 0x02, 0xFC, 0xE1, | ||
| 482 | 0xCD, 0xF7, 0xE2, 0xEC, 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, | ||
| 483 | 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, 0x8E, 0x4F, 0x12, 0x32, | ||
| 484 | 0xEE, 0xF2, 0x81, 0x83, 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, | ||
| 485 | 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, 0xC5, 0x8E, 0xF1, 0x83, | ||
| 486 | 0x7D, 0x16, 0x83, 0xB2, 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, | ||
| 487 | 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, 0xDE, 0x35, 0x5B, 0x3B, | ||
| 488 | 0x65, 0x19, 0x03, 0x5B, 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38, | ||
| 489 | 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, 0x7A, 0xD9, 0x1D, 0x26, | ||
| 490 | 0x91, 0xF7, 0xF7, 0xEE, 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C, | ||
| 491 | 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, 0xB4, 0x13, 0x0C, 0x93, | ||
| 492 | 0xBC, 0x43, 0x79, 0x44, 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3, | ||
| 493 | 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, 0x5C, 0xAE, 0x82, 0xAB, | ||
| 494 | 0x9C, 0x9D, 0xF6, 0x9E, 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D, | ||
| 495 | 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, 0x1D, 0xBF, 0x9A, 0x42, | ||
| 496 | 0xD5, 0xC4, 0x48, 0x4E, 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF, | ||
| 497 | 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, 0x25, 0xE4, 0x1D, 0x2B, | ||
| 498 | 0x66, 0xC6, 0x2E, 0x37, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | ||
| 499 | }; | ||
| 500 | |||
| 501 | BIGNUM * | ||
| 502 | BN_get_rfc7919_prime_3072(BIGNUM *bn) | ||
| 503 | { | ||
| 504 | return BN_bin2bn(RFC7919_PRIME_3072, sizeof(RFC7919_PRIME_3072), bn); | ||
| 505 | } | ||
| 506 | |||
| 507 | static const unsigned char RFC7919_PRIME_4096[] = { | ||
| 508 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xAD, 0xF8, 0x54, 0x58, | ||
| 509 | 0xA2, 0xBB, 0x4A, 0x9A, 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, | ||
| 510 | 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, 0xA9, 0xE1, 0x36, 0x41, | ||
| 511 | 0x14, 0x64, 0x33, 0xFB, 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, | ||
| 512 | 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, 0xF6, 0x81, 0xB2, 0x02, | ||
| 513 | 0xAE, 0xC4, 0x61, 0x7A, 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, | ||
| 514 | 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, 0x85, 0x63, 0x65, 0x55, | ||
| 515 | 0x3D, 0xED, 0x1A, 0xF3, 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, | ||
| 516 | 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, 0xE2, 0xA6, 0x89, 0xDA, | ||
| 517 | 0xF3, 0xEF, 0xE8, 0x72, 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, | ||
| 518 | 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, 0xBC, 0x0A, 0xB1, 0x82, | ||
| 519 | 0xB3, 0x24, 0xFB, 0x61, 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, | ||
| 520 | 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, 0x1D, 0x4F, 0x42, 0xA3, | ||
| 521 | 0xDE, 0x39, 0x4D, 0xF4, 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, | ||
| 522 | 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, 0x9E, 0x02, 0xFC, 0xE1, | ||
| 523 | 0xCD, 0xF7, 0xE2, 0xEC, 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, | ||
| 524 | 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, 0x8E, 0x4F, 0x12, 0x32, | ||
| 525 | 0xEE, 0xF2, 0x81, 0x83, 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, | ||
| 526 | 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, 0xC5, 0x8E, 0xF1, 0x83, | ||
| 527 | 0x7D, 0x16, 0x83, 0xB2, 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, | ||
| 528 | 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, 0xDE, 0x35, 0x5B, 0x3B, | ||
| 529 | 0x65, 0x19, 0x03, 0x5B, 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38, | ||
| 530 | 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, 0x7A, 0xD9, 0x1D, 0x26, | ||
| 531 | 0x91, 0xF7, 0xF7, 0xEE, 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C, | ||
| 532 | 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, 0xB4, 0x13, 0x0C, 0x93, | ||
| 533 | 0xBC, 0x43, 0x79, 0x44, 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3, | ||
| 534 | 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, 0x5C, 0xAE, 0x82, 0xAB, | ||
| 535 | 0x9C, 0x9D, 0xF6, 0x9E, 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D, | ||
| 536 | 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, 0x1D, 0xBF, 0x9A, 0x42, | ||
| 537 | 0xD5, 0xC4, 0x48, 0x4E, 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF, | ||
| 538 | 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, 0x25, 0xE4, 0x1D, 0x2B, | ||
| 539 | 0x66, 0x9E, 0x1E, 0xF1, 0x6E, 0x6F, 0x52, 0xC3, 0x16, 0x4D, 0xF4, 0xFB, | ||
| 540 | 0x79, 0x30, 0xE9, 0xE4, 0xE5, 0x88, 0x57, 0xB6, 0xAC, 0x7D, 0x5F, 0x42, | ||
| 541 | 0xD6, 0x9F, 0x6D, 0x18, 0x77, 0x63, 0xCF, 0x1D, 0x55, 0x03, 0x40, 0x04, | ||
| 542 | 0x87, 0xF5, 0x5B, 0xA5, 0x7E, 0x31, 0xCC, 0x7A, 0x71, 0x35, 0xC8, 0x86, | ||
| 543 | 0xEF, 0xB4, 0x31, 0x8A, 0xED, 0x6A, 0x1E, 0x01, 0x2D, 0x9E, 0x68, 0x32, | ||
| 544 | 0xA9, 0x07, 0x60, 0x0A, 0x91, 0x81, 0x30, 0xC4, 0x6D, 0xC7, 0x78, 0xF9, | ||
| 545 | 0x71, 0xAD, 0x00, 0x38, 0x09, 0x29, 0x99, 0xA3, 0x33, 0xCB, 0x8B, 0x7A, | ||
| 546 | 0x1A, 0x1D, 0xB9, 0x3D, 0x71, 0x40, 0x00, 0x3C, 0x2A, 0x4E, 0xCE, 0xA9, | ||
| 547 | 0xF9, 0x8D, 0x0A, 0xCC, 0x0A, 0x82, 0x91, 0xCD, 0xCE, 0xC9, 0x7D, 0xCF, | ||
| 548 | 0x8E, 0xC9, 0xB5, 0x5A, 0x7F, 0x88, 0xA4, 0x6B, 0x4D, 0xB5, 0xA8, 0x51, | ||
| 549 | 0xF4, 0x41, 0x82, 0xE1, 0xC6, 0x8A, 0x00, 0x7E, 0x5E, 0x65, 0x5F, 0x6A, | ||
| 550 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | ||
| 551 | }; | ||
| 552 | |||
| 553 | BIGNUM * | ||
| 554 | BN_get_rfc7919_prime_4096(BIGNUM *bn) | ||
| 555 | { | ||
| 556 | return BN_bin2bn(RFC7919_PRIME_4096, sizeof(RFC7919_PRIME_4096), bn); | ||
| 557 | } | ||
| 558 | |||
| 559 | static const unsigned char RFC7919_PRIME_6144[] = { | ||
| 560 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xAD, 0xF8, 0x54, 0x58, | ||
| 561 | 0xA2, 0xBB, 0x4A, 0x9A, 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, | ||
| 562 | 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, 0xA9, 0xE1, 0x36, 0x41, | ||
| 563 | 0x14, 0x64, 0x33, 0xFB, 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, | ||
| 564 | 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, 0xF6, 0x81, 0xB2, 0x02, | ||
| 565 | 0xAE, 0xC4, 0x61, 0x7A, 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, | ||
| 566 | 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, 0x85, 0x63, 0x65, 0x55, | ||
| 567 | 0x3D, 0xED, 0x1A, 0xF3, 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, | ||
| 568 | 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, 0xE2, 0xA6, 0x89, 0xDA, | ||
| 569 | 0xF3, 0xEF, 0xE8, 0x72, 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, | ||
| 570 | 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, 0xBC, 0x0A, 0xB1, 0x82, | ||
| 571 | 0xB3, 0x24, 0xFB, 0x61, 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, | ||
| 572 | 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, 0x1D, 0x4F, 0x42, 0xA3, | ||
| 573 | 0xDE, 0x39, 0x4D, 0xF4, 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, | ||
| 574 | 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, 0x9E, 0x02, 0xFC, 0xE1, | ||
| 575 | 0xCD, 0xF7, 0xE2, 0xEC, 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, | ||
| 576 | 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, 0x8E, 0x4F, 0x12, 0x32, | ||
| 577 | 0xEE, 0xF2, 0x81, 0x83, 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, | ||
| 578 | 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, 0xC5, 0x8E, 0xF1, 0x83, | ||
| 579 | 0x7D, 0x16, 0x83, 0xB2, 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, | ||
| 580 | 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, 0xDE, 0x35, 0x5B, 0x3B, | ||
| 581 | 0x65, 0x19, 0x03, 0x5B, 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38, | ||
| 582 | 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, 0x7A, 0xD9, 0x1D, 0x26, | ||
| 583 | 0x91, 0xF7, 0xF7, 0xEE, 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C, | ||
| 584 | 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, 0xB4, 0x13, 0x0C, 0x93, | ||
| 585 | 0xBC, 0x43, 0x79, 0x44, 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3, | ||
| 586 | 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, 0x5C, 0xAE, 0x82, 0xAB, | ||
| 587 | 0x9C, 0x9D, 0xF6, 0x9E, 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D, | ||
| 588 | 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, 0x1D, 0xBF, 0x9A, 0x42, | ||
| 589 | 0xD5, 0xC4, 0x48, 0x4E, 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF, | ||
| 590 | 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, 0x25, 0xE4, 0x1D, 0x2B, | ||
| 591 | 0x66, 0x9E, 0x1E, 0xF1, 0x6E, 0x6F, 0x52, 0xC3, 0x16, 0x4D, 0xF4, 0xFB, | ||
| 592 | 0x79, 0x30, 0xE9, 0xE4, 0xE5, 0x88, 0x57, 0xB6, 0xAC, 0x7D, 0x5F, 0x42, | ||
| 593 | 0xD6, 0x9F, 0x6D, 0x18, 0x77, 0x63, 0xCF, 0x1D, 0x55, 0x03, 0x40, 0x04, | ||
| 594 | 0x87, 0xF5, 0x5B, 0xA5, 0x7E, 0x31, 0xCC, 0x7A, 0x71, 0x35, 0xC8, 0x86, | ||
| 595 | 0xEF, 0xB4, 0x31, 0x8A, 0xED, 0x6A, 0x1E, 0x01, 0x2D, 0x9E, 0x68, 0x32, | ||
| 596 | 0xA9, 0x07, 0x60, 0x0A, 0x91, 0x81, 0x30, 0xC4, 0x6D, 0xC7, 0x78, 0xF9, | ||
| 597 | 0x71, 0xAD, 0x00, 0x38, 0x09, 0x29, 0x99, 0xA3, 0x33, 0xCB, 0x8B, 0x7A, | ||
| 598 | 0x1A, 0x1D, 0xB9, 0x3D, 0x71, 0x40, 0x00, 0x3C, 0x2A, 0x4E, 0xCE, 0xA9, | ||
| 599 | 0xF9, 0x8D, 0x0A, 0xCC, 0x0A, 0x82, 0x91, 0xCD, 0xCE, 0xC9, 0x7D, 0xCF, | ||
| 600 | 0x8E, 0xC9, 0xB5, 0x5A, 0x7F, 0x88, 0xA4, 0x6B, 0x4D, 0xB5, 0xA8, 0x51, | ||
| 601 | 0xF4, 0x41, 0x82, 0xE1, 0xC6, 0x8A, 0x00, 0x7E, 0x5E, 0x0D, 0xD9, 0x02, | ||
| 602 | 0x0B, 0xFD, 0x64, 0xB6, 0x45, 0x03, 0x6C, 0x7A, 0x4E, 0x67, 0x7D, 0x2C, | ||
| 603 | 0x38, 0x53, 0x2A, 0x3A, 0x23, 0xBA, 0x44, 0x42, 0xCA, 0xF5, 0x3E, 0xA6, | ||
| 604 | 0x3B, 0xB4, 0x54, 0x32, 0x9B, 0x76, 0x24, 0xC8, 0x91, 0x7B, 0xDD, 0x64, | ||
| 605 | 0xB1, 0xC0, 0xFD, 0x4C, 0xB3, 0x8E, 0x8C, 0x33, 0x4C, 0x70, 0x1C, 0x3A, | ||
| 606 | 0xCD, 0xAD, 0x06, 0x57, 0xFC, 0xCF, 0xEC, 0x71, 0x9B, 0x1F, 0x5C, 0x3E, | ||
| 607 | 0x4E, 0x46, 0x04, 0x1F, 0x38, 0x81, 0x47, 0xFB, 0x4C, 0xFD, 0xB4, 0x77, | ||
| 608 | 0xA5, 0x24, 0x71, 0xF7, 0xA9, 0xA9, 0x69, 0x10, 0xB8, 0x55, 0x32, 0x2E, | ||
| 609 | 0xDB, 0x63, 0x40, 0xD8, 0xA0, 0x0E, 0xF0, 0x92, 0x35, 0x05, 0x11, 0xE3, | ||
| 610 | 0x0A, 0xBE, 0xC1, 0xFF, 0xF9, 0xE3, 0xA2, 0x6E, 0x7F, 0xB2, 0x9F, 0x8C, | ||
| 611 | 0x18, 0x30, 0x23, 0xC3, 0x58, 0x7E, 0x38, 0xDA, 0x00, 0x77, 0xD9, 0xB4, | ||
| 612 | 0x76, 0x3E, 0x4E, 0x4B, 0x94, 0xB2, 0xBB, 0xC1, 0x94, 0xC6, 0x65, 0x1E, | ||
| 613 | 0x77, 0xCA, 0xF9, 0x92, 0xEE, 0xAA, 0xC0, 0x23, 0x2A, 0x28, 0x1B, 0xF6, | ||
| 614 | 0xB3, 0xA7, 0x39, 0xC1, 0x22, 0x61, 0x16, 0x82, 0x0A, 0xE8, 0xDB, 0x58, | ||
| 615 | 0x47, 0xA6, 0x7C, 0xBE, 0xF9, 0xC9, 0x09, 0x1B, 0x46, 0x2D, 0x53, 0x8C, | ||
| 616 | 0xD7, 0x2B, 0x03, 0x74, 0x6A, 0xE7, 0x7F, 0x5E, 0x62, 0x29, 0x2C, 0x31, | ||
| 617 | 0x15, 0x62, 0xA8, 0x46, 0x50, 0x5D, 0xC8, 0x2D, 0xB8, 0x54, 0x33, 0x8A, | ||
| 618 | 0xE4, 0x9F, 0x52, 0x35, 0xC9, 0x5B, 0x91, 0x17, 0x8C, 0xCF, 0x2D, 0xD5, | ||
| 619 | 0xCA, 0xCE, 0xF4, 0x03, 0xEC, 0x9D, 0x18, 0x10, 0xC6, 0x27, 0x2B, 0x04, | ||
| 620 | 0x5B, 0x3B, 0x71, 0xF9, 0xDC, 0x6B, 0x80, 0xD6, 0x3F, 0xDD, 0x4A, 0x8E, | ||
| 621 | 0x9A, 0xDB, 0x1E, 0x69, 0x62, 0xA6, 0x95, 0x26, 0xD4, 0x31, 0x61, 0xC1, | ||
| 622 | 0xA4, 0x1D, 0x57, 0x0D, 0x79, 0x38, 0xDA, 0xD4, 0xA4, 0x0E, 0x32, 0x9C, | ||
| 623 | 0xD0, 0xE4, 0x0E, 0x65, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | ||
| 624 | }; | ||
| 625 | |||
| 626 | BIGNUM * | ||
| 627 | BN_get_rfc7919_prime_6144(BIGNUM *bn) | ||
| 628 | { | ||
| 629 | return BN_bin2bn(RFC7919_PRIME_6144, sizeof(RFC7919_PRIME_6144), bn); | ||
| 630 | } | ||
| 631 | |||
| 632 | static const unsigned char RFC7919_PRIME_8192[] = { | ||
| 633 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xAD, 0xF8, 0x54, 0x58, | ||
| 634 | 0xA2, 0xBB, 0x4A, 0x9A, 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, | ||
| 635 | 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, 0xA9, 0xE1, 0x36, 0x41, | ||
| 636 | 0x14, 0x64, 0x33, 0xFB, 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, | ||
| 637 | 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, 0xF6, 0x81, 0xB2, 0x02, | ||
| 638 | 0xAE, 0xC4, 0x61, 0x7A, 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, | ||
| 639 | 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, 0x85, 0x63, 0x65, 0x55, | ||
| 640 | 0x3D, 0xED, 0x1A, 0xF3, 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, | ||
| 641 | 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, 0xE2, 0xA6, 0x89, 0xDA, | ||
| 642 | 0xF3, 0xEF, 0xE8, 0x72, 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, | ||
| 643 | 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, 0xBC, 0x0A, 0xB1, 0x82, | ||
| 644 | 0xB3, 0x24, 0xFB, 0x61, 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, | ||
| 645 | 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, 0x1D, 0x4F, 0x42, 0xA3, | ||
| 646 | 0xDE, 0x39, 0x4D, 0xF4, 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, | ||
| 647 | 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, 0x9E, 0x02, 0xFC, 0xE1, | ||
| 648 | 0xCD, 0xF7, 0xE2, 0xEC, 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, | ||
| 649 | 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, 0x8E, 0x4F, 0x12, 0x32, | ||
| 650 | 0xEE, 0xF2, 0x81, 0x83, 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, | ||
| 651 | 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, 0xC5, 0x8E, 0xF1, 0x83, | ||
| 652 | 0x7D, 0x16, 0x83, 0xB2, 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, | ||
| 653 | 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, 0xDE, 0x35, 0x5B, 0x3B, | ||
| 654 | 0x65, 0x19, 0x03, 0x5B, 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38, | ||
| 655 | 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, 0x7A, 0xD9, 0x1D, 0x26, | ||
| 656 | 0x91, 0xF7, 0xF7, 0xEE, 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C, | ||
| 657 | 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, 0xB4, 0x13, 0x0C, 0x93, | ||
| 658 | 0xBC, 0x43, 0x79, 0x44, 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3, | ||
| 659 | 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, 0x5C, 0xAE, 0x82, 0xAB, | ||
| 660 | 0x9C, 0x9D, 0xF6, 0x9E, 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D, | ||
| 661 | 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, 0x1D, 0xBF, 0x9A, 0x42, | ||
| 662 | 0xD5, 0xC4, 0x48, 0x4E, 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF, | ||
| 663 | 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, 0x25, 0xE4, 0x1D, 0x2B, | ||
| 664 | 0x66, 0x9E, 0x1E, 0xF1, 0x6E, 0x6F, 0x52, 0xC3, 0x16, 0x4D, 0xF4, 0xFB, | ||
| 665 | 0x79, 0x30, 0xE9, 0xE4, 0xE5, 0x88, 0x57, 0xB6, 0xAC, 0x7D, 0x5F, 0x42, | ||
| 666 | 0xD6, 0x9F, 0x6D, 0x18, 0x77, 0x63, 0xCF, 0x1D, 0x55, 0x03, 0x40, 0x04, | ||
| 667 | 0x87, 0xF5, 0x5B, 0xA5, 0x7E, 0x31, 0xCC, 0x7A, 0x71, 0x35, 0xC8, 0x86, | ||
| 668 | 0xEF, 0xB4, 0x31, 0x8A, 0xED, 0x6A, 0x1E, 0x01, 0x2D, 0x9E, 0x68, 0x32, | ||
| 669 | 0xA9, 0x07, 0x60, 0x0A, 0x91, 0x81, 0x30, 0xC4, 0x6D, 0xC7, 0x78, 0xF9, | ||
| 670 | 0x71, 0xAD, 0x00, 0x38, 0x09, 0x29, 0x99, 0xA3, 0x33, 0xCB, 0x8B, 0x7A, | ||
| 671 | 0x1A, 0x1D, 0xB9, 0x3D, 0x71, 0x40, 0x00, 0x3C, 0x2A, 0x4E, 0xCE, 0xA9, | ||
| 672 | 0xF9, 0x8D, 0x0A, 0xCC, 0x0A, 0x82, 0x91, 0xCD, 0xCE, 0xC9, 0x7D, 0xCF, | ||
| 673 | 0x8E, 0xC9, 0xB5, 0x5A, 0x7F, 0x88, 0xA4, 0x6B, 0x4D, 0xB5, 0xA8, 0x51, | ||
| 674 | 0xF4, 0x41, 0x82, 0xE1, 0xC6, 0x8A, 0x00, 0x7E, 0x5E, 0x0D, 0xD9, 0x02, | ||
| 675 | 0x0B, 0xFD, 0x64, 0xB6, 0x45, 0x03, 0x6C, 0x7A, 0x4E, 0x67, 0x7D, 0x2C, | ||
| 676 | 0x38, 0x53, 0x2A, 0x3A, 0x23, 0xBA, 0x44, 0x42, 0xCA, 0xF5, 0x3E, 0xA6, | ||
| 677 | 0x3B, 0xB4, 0x54, 0x32, 0x9B, 0x76, 0x24, 0xC8, 0x91, 0x7B, 0xDD, 0x64, | ||
| 678 | 0xB1, 0xC0, 0xFD, 0x4C, 0xB3, 0x8E, 0x8C, 0x33, 0x4C, 0x70, 0x1C, 0x3A, | ||
| 679 | 0xCD, 0xAD, 0x06, 0x57, 0xFC, 0xCF, 0xEC, 0x71, 0x9B, 0x1F, 0x5C, 0x3E, | ||
| 680 | 0x4E, 0x46, 0x04, 0x1F, 0x38, 0x81, 0x47, 0xFB, 0x4C, 0xFD, 0xB4, 0x77, | ||
| 681 | 0xA5, 0x24, 0x71, 0xF7, 0xA9, 0xA9, 0x69, 0x10, 0xB8, 0x55, 0x32, 0x2E, | ||
| 682 | 0xDB, 0x63, 0x40, 0xD8, 0xA0, 0x0E, 0xF0, 0x92, 0x35, 0x05, 0x11, 0xE3, | ||
| 683 | 0x0A, 0xBE, 0xC1, 0xFF, 0xF9, 0xE3, 0xA2, 0x6E, 0x7F, 0xB2, 0x9F, 0x8C, | ||
| 684 | 0x18, 0x30, 0x23, 0xC3, 0x58, 0x7E, 0x38, 0xDA, 0x00, 0x77, 0xD9, 0xB4, | ||
| 685 | 0x76, 0x3E, 0x4E, 0x4B, 0x94, 0xB2, 0xBB, 0xC1, 0x94, 0xC6, 0x65, 0x1E, | ||
| 686 | 0x77, 0xCA, 0xF9, 0x92, 0xEE, 0xAA, 0xC0, 0x23, 0x2A, 0x28, 0x1B, 0xF6, | ||
| 687 | 0xB3, 0xA7, 0x39, 0xC1, 0x22, 0x61, 0x16, 0x82, 0x0A, 0xE8, 0xDB, 0x58, | ||
| 688 | 0x47, 0xA6, 0x7C, 0xBE, 0xF9, 0xC9, 0x09, 0x1B, 0x46, 0x2D, 0x53, 0x8C, | ||
| 689 | 0xD7, 0x2B, 0x03, 0x74, 0x6A, 0xE7, 0x7F, 0x5E, 0x62, 0x29, 0x2C, 0x31, | ||
| 690 | 0x15, 0x62, 0xA8, 0x46, 0x50, 0x5D, 0xC8, 0x2D, 0xB8, 0x54, 0x33, 0x8A, | ||
| 691 | 0xE4, 0x9F, 0x52, 0x35, 0xC9, 0x5B, 0x91, 0x17, 0x8C, 0xCF, 0x2D, 0xD5, | ||
| 692 | 0xCA, 0xCE, 0xF4, 0x03, 0xEC, 0x9D, 0x18, 0x10, 0xC6, 0x27, 0x2B, 0x04, | ||
| 693 | 0x5B, 0x3B, 0x71, 0xF9, 0xDC, 0x6B, 0x80, 0xD6, 0x3F, 0xDD, 0x4A, 0x8E, | ||
| 694 | 0x9A, 0xDB, 0x1E, 0x69, 0x62, 0xA6, 0x95, 0x26, 0xD4, 0x31, 0x61, 0xC1, | ||
| 695 | 0xA4, 0x1D, 0x57, 0x0D, 0x79, 0x38, 0xDA, 0xD4, 0xA4, 0x0E, 0x32, 0x9C, | ||
| 696 | 0xCF, 0xF4, 0x6A, 0xAA, 0x36, 0xAD, 0x00, 0x4C, 0xF6, 0x00, 0xC8, 0x38, | ||
| 697 | 0x1E, 0x42, 0x5A, 0x31, 0xD9, 0x51, 0xAE, 0x64, 0xFD, 0xB2, 0x3F, 0xCE, | ||
| 698 | 0xC9, 0x50, 0x9D, 0x43, 0x68, 0x7F, 0xEB, 0x69, 0xED, 0xD1, 0xCC, 0x5E, | ||
| 699 | 0x0B, 0x8C, 0xC3, 0xBD, 0xF6, 0x4B, 0x10, 0xEF, 0x86, 0xB6, 0x31, 0x42, | ||
| 700 | 0xA3, 0xAB, 0x88, 0x29, 0x55, 0x5B, 0x2F, 0x74, 0x7C, 0x93, 0x26, 0x65, | ||
| 701 | 0xCB, 0x2C, 0x0F, 0x1C, 0xC0, 0x1B, 0xD7, 0x02, 0x29, 0x38, 0x88, 0x39, | ||
| 702 | 0xD2, 0xAF, 0x05, 0xE4, 0x54, 0x50, 0x4A, 0xC7, 0x8B, 0x75, 0x82, 0x82, | ||
| 703 | 0x28, 0x46, 0xC0, 0xBA, 0x35, 0xC3, 0x5F, 0x5C, 0x59, 0x16, 0x0C, 0xC0, | ||
| 704 | 0x46, 0xFD, 0x82, 0x51, 0x54, 0x1F, 0xC6, 0x8C, 0x9C, 0x86, 0xB0, 0x22, | ||
| 705 | 0xBB, 0x70, 0x99, 0x87, 0x6A, 0x46, 0x0E, 0x74, 0x51, 0xA8, 0xA9, 0x31, | ||
| 706 | 0x09, 0x70, 0x3F, 0xEE, 0x1C, 0x21, 0x7E, 0x6C, 0x38, 0x26, 0xE5, 0x2C, | ||
| 707 | 0x51, 0xAA, 0x69, 0x1E, 0x0E, 0x42, 0x3C, 0xFC, 0x99, 0xE9, 0xE3, 0x16, | ||
| 708 | 0x50, 0xC1, 0x21, 0x7B, 0x62, 0x48, 0x16, 0xCD, 0xAD, 0x9A, 0x95, 0xF9, | ||
| 709 | 0xD5, 0xB8, 0x01, 0x94, 0x88, 0xD9, 0xC0, 0xA0, 0xA1, 0xFE, 0x30, 0x75, | ||
| 710 | 0xA5, 0x77, 0xE2, 0x31, 0x83, 0xF8, 0x1D, 0x4A, 0x3F, 0x2F, 0xA4, 0x57, | ||
| 711 | 0x1E, 0xFC, 0x8C, 0xE0, 0xBA, 0x8A, 0x4F, 0xE8, 0xB6, 0x85, 0x5D, 0xFE, | ||
| 712 | 0x72, 0xB0, 0xA6, 0x6E, 0xDE, 0xD2, 0xFB, 0xAB, 0xFB, 0xE5, 0x8A, 0x30, | ||
| 713 | 0xFA, 0xFA, 0xBE, 0x1C, 0x5D, 0x71, 0xA8, 0x7E, 0x2F, 0x74, 0x1E, 0xF8, | ||
| 714 | 0xC1, 0xFE, 0x86, 0xFE, 0xA6, 0xBB, 0xFD, 0xE5, 0x30, 0x67, 0x7F, 0x0D, | ||
| 715 | 0x97, 0xD1, 0x1D, 0x49, 0xF7, 0xA8, 0x44, 0x3D, 0x08, 0x22, 0xE5, 0x06, | ||
| 716 | 0xA9, 0xF4, 0x61, 0x4E, 0x01, 0x1E, 0x2A, 0x94, 0x83, 0x8F, 0xF8, 0x8C, | ||
| 717 | 0xD6, 0x8C, 0x8B, 0xB7, 0xC5, 0xC6, 0x42, 0x4C, 0xFF, 0xFF, 0xFF, 0xFF, | ||
| 718 | 0xFF, 0xFF, 0xFF, 0xFF, | ||
| 719 | }; | ||
| 720 | |||
| 721 | BIGNUM * | ||
| 722 | BN_get_rfc7919_prime_8192(BIGNUM *bn) | ||
| 723 | { | ||
| 724 | return BN_bin2bn(RFC7919_PRIME_8192, sizeof(RFC7919_PRIME_8192), bn); | ||
| 725 | } | ||
diff --git a/src/lib/libcrypto/bn/bn_convert.c b/src/lib/libcrypto/bn/bn_convert.c index 6a6354f44e..ab5bc519c8 100644 --- a/src/lib/libcrypto/bn/bn_convert.c +++ b/src/lib/libcrypto/bn/bn_convert.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_convert.c,v 1.23 2024/11/08 14:18:44 jsing Exp $ */ | 1 | /* $OpenBSD: bn_convert.c,v 1.25 2025/12/05 14:12:32 tb Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -65,11 +65,19 @@ | |||
| 65 | 65 | ||
| 66 | #include <openssl/bio.h> | 66 | #include <openssl/bio.h> |
| 67 | #include <openssl/buffer.h> | 67 | #include <openssl/buffer.h> |
| 68 | #include <openssl/err.h> | ||
| 69 | 68 | ||
| 70 | #include "bn_local.h" | 69 | #include "bn_local.h" |
| 71 | #include "bytestring.h" | 70 | #include "bytestring.h" |
| 72 | #include "crypto_internal.h" | 71 | #include "crypto_internal.h" |
| 72 | #include "err_local.h" | ||
| 73 | |||
| 74 | #if BN_BYTES == 8 | ||
| 75 | #define BN_DEC_CONV UINT64_C(10000000000000000000) | ||
| 76 | #define BN_DEC_NUM 19 | ||
| 77 | #else | ||
| 78 | #define BN_DEC_CONV UINT32_C(1000000000) | ||
| 79 | #define BN_DEC_NUM 9 | ||
| 80 | #endif | ||
| 73 | 81 | ||
| 74 | static int bn_dec2bn_cbs(BIGNUM **bnp, CBS *cbs); | 82 | static int bn_dec2bn_cbs(BIGNUM **bnp, CBS *cbs); |
| 75 | static int bn_hex2bn_cbs(BIGNUM **bnp, CBS *cbs); | 83 | static int bn_hex2bn_cbs(BIGNUM **bnp, CBS *cbs); |
diff --git a/src/lib/libcrypto/bn/bn_ctx.c b/src/lib/libcrypto/bn/bn_ctx.c index 129b9c9781..eda93dcaa4 100644 --- a/src/lib/libcrypto/bn/bn_ctx.c +++ b/src/lib/libcrypto/bn/bn_ctx.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_ctx.c,v 1.22 2023/07/08 12:21:58 beck Exp $ */ | 1 | /* $OpenBSD: bn_ctx.c,v 1.23 2025/05/10 05:54:38 tb Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -19,9 +19,9 @@ | |||
| 19 | #include <string.h> | 19 | #include <string.h> |
| 20 | 20 | ||
| 21 | #include <openssl/opensslconf.h> | 21 | #include <openssl/opensslconf.h> |
| 22 | #include <openssl/err.h> | ||
| 23 | 22 | ||
| 24 | #include "bn_local.h" | 23 | #include "bn_local.h" |
| 24 | #include "err_local.h" | ||
| 25 | 25 | ||
| 26 | #define BN_CTX_INITIAL_LEN 8 | 26 | #define BN_CTX_INITIAL_LEN 8 |
| 27 | 27 | ||
diff --git a/src/lib/libcrypto/bn/bn_div.c b/src/lib/libcrypto/bn/bn_div.c index 09a8a364df..0a914db752 100644 --- a/src/lib/libcrypto/bn/bn_div.c +++ b/src/lib/libcrypto/bn/bn_div.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_div.c,v 1.41 2024/04/10 14:58:06 beck Exp $ */ | 1 | /* $OpenBSD: bn_div.c,v 1.44 2025/09/07 06:28:03 jsing Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -62,25 +62,15 @@ | |||
| 62 | #include <openssl/opensslconf.h> | 62 | #include <openssl/opensslconf.h> |
| 63 | 63 | ||
| 64 | #include <openssl/bn.h> | 64 | #include <openssl/bn.h> |
| 65 | #include <openssl/err.h> | ||
| 66 | 65 | ||
| 67 | #include "bn_arch.h" | 66 | #include "bn_arch.h" |
| 68 | #include "bn_local.h" | 67 | #include "bn_local.h" |
| 69 | #include "bn_internal.h" | 68 | #include "bn_internal.h" |
| 69 | #include "err_local.h" | ||
| 70 | 70 | ||
| 71 | BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0); | 71 | BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0); |
| 72 | 72 | ||
| 73 | #ifndef HAVE_BN_DIV_WORDS | 73 | #ifndef HAVE_BN_DIV_WORDS |
| 74 | #if defined(BN_LLONG) && defined(BN_DIV2W) | ||
| 75 | |||
| 76 | BN_ULONG | ||
| 77 | bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) | ||
| 78 | { | ||
| 79 | return ((BN_ULONG)(((((BN_ULLONG)h) << BN_BITS2)|l)/(BN_ULLONG)d)); | ||
| 80 | } | ||
| 81 | |||
| 82 | #else | ||
| 83 | |||
| 84 | /* Divide h,l by d and return the result. */ | 74 | /* Divide h,l by d and return the result. */ |
| 85 | /* I need to test this some more :-( */ | 75 | /* I need to test this some more :-( */ |
| 86 | BN_ULONG | 76 | BN_ULONG |
| @@ -148,7 +138,6 @@ bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) | |||
| 148 | ret |= q; | 138 | ret |= q; |
| 149 | return (ret); | 139 | return (ret); |
| 150 | } | 140 | } |
| 151 | #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */ | ||
| 152 | #endif | 141 | #endif |
| 153 | 142 | ||
| 154 | /* | 143 | /* |
| @@ -375,7 +364,7 @@ BN_div_internal(BIGNUM *quotient, BIGNUM *remainder, const BIGNUM *numerator, | |||
| 375 | * | wnum - sdiv * q | < sdiv | 364 | * | wnum - sdiv * q | < sdiv |
| 376 | */ | 365 | */ |
| 377 | q = bn_div_3_words(wnump, d1, d0); | 366 | q = bn_div_3_words(wnump, d1, d0); |
| 378 | l0 = bn_mul_words(tmp->d, sdiv->d, div_n, q); | 367 | l0 = bn_mulw_words(tmp->d, sdiv->d, div_n, q); |
| 379 | tmp->d[div_n] = l0; | 368 | tmp->d[div_n] = l0; |
| 380 | wnum.d--; | 369 | wnum.d--; |
| 381 | 370 | ||
diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c index e925d325d2..6a5c1c857a 100644 --- a/src/lib/libcrypto/bn/bn_exp.c +++ b/src/lib/libcrypto/bn/bn_exp.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_exp.c,v 1.58 2025/02/13 11:15:09 tb Exp $ */ | 1 | /* $OpenBSD: bn_exp.c,v 1.59 2025/05/10 05:54:38 tb Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -112,10 +112,9 @@ | |||
| 112 | #include <stdlib.h> | 112 | #include <stdlib.h> |
| 113 | #include <string.h> | 113 | #include <string.h> |
| 114 | 114 | ||
| 115 | #include <openssl/err.h> | ||
| 116 | |||
| 117 | #include "bn_local.h" | 115 | #include "bn_local.h" |
| 118 | #include "constant_time.h" | 116 | #include "constant_time.h" |
| 117 | #include "err_local.h" | ||
| 119 | 118 | ||
| 120 | /* maximum precomputation table size for *variable* sliding windows */ | 119 | /* maximum precomputation table size for *variable* sliding windows */ |
| 121 | #define TABLE_SIZE 32 | 120 | #define TABLE_SIZE 32 |
diff --git a/src/lib/libcrypto/bn/bn_gcd.c b/src/lib/libcrypto/bn/bn_gcd.c index fa5d71a7f3..319d9ca390 100644 --- a/src/lib/libcrypto/bn/bn_gcd.c +++ b/src/lib/libcrypto/bn/bn_gcd.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_gcd.c,v 1.29 2024/04/10 14:58:06 beck Exp $ */ | 1 | /* $OpenBSD: bn_gcd.c,v 1.31 2025/06/02 12:40:10 tb Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -109,9 +109,8 @@ | |||
| 109 | * | 109 | * |
| 110 | */ | 110 | */ |
| 111 | 111 | ||
| 112 | #include <openssl/err.h> | ||
| 113 | |||
| 114 | #include "bn_local.h" | 112 | #include "bn_local.h" |
| 113 | #include "err_local.h" | ||
| 115 | 114 | ||
| 116 | static BIGNUM * | 115 | static BIGNUM * |
| 117 | euclid(BIGNUM *a, BIGNUM *b) | 116 | euclid(BIGNUM *a, BIGNUM *b) |
| @@ -681,8 +680,10 @@ BN_mod_inverse_internal(BIGNUM *in, const BIGNUM *a, const BIGNUM *n, BN_CTX *ct | |||
| 681 | /* A >= 2*B, so D=2 or D=3 */ | 680 | /* A >= 2*B, so D=2 or D=3 */ |
| 682 | if (!BN_sub(M, A, T)) | 681 | if (!BN_sub(M, A, T)) |
| 683 | goto err; | 682 | goto err; |
| 684 | if (!BN_add(D,T,B)) goto err; /* use D (:= 3*B) as temp */ | 683 | /* use D (:= 3*B) as temp */ |
| 685 | if (BN_ucmp(A, D) < 0) { | 684 | if (!BN_add(D, T, B)) |
| 685 | goto err; | ||
| 686 | if (BN_ucmp(A, D) < 0) { | ||
| 686 | /* A < 3*B, so D=2 */ | 687 | /* A < 3*B, so D=2 */ |
| 687 | if (!BN_set_word(D, 2)) | 688 | if (!BN_set_word(D, 2)) |
| 688 | goto err; | 689 | goto err; |
diff --git a/src/lib/libcrypto/bn/bn_internal.h b/src/lib/libcrypto/bn/bn_internal.h index fd04bc9f8a..efe8202aa0 100644 --- a/src/lib/libcrypto/bn/bn_internal.h +++ b/src/lib/libcrypto/bn/bn_internal.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_internal.h,v 1.15 2023/06/25 11:42:26 jsing Exp $ */ | 1 | /* $OpenBSD: bn_internal.h,v 1.21 2025/12/05 14:12:32 tb Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <openssl/bn.h> | 18 | #include <openssl/bn.h> |
| 19 | 19 | ||
| 20 | #include "bn_arch.h" | 20 | #include "bn_arch.h" |
| 21 | #include "bn_local.h" | ||
| 21 | 22 | ||
| 22 | #ifndef HEADER_BN_INTERNAL_H | 23 | #ifndef HEADER_BN_INTERNAL_H |
| 23 | #define HEADER_BN_INTERNAL_H | 24 | #define HEADER_BN_INTERNAL_H |
| @@ -26,6 +27,30 @@ int bn_word_clz(BN_ULONG w); | |||
| 26 | 27 | ||
| 27 | int bn_bitsize(const BIGNUM *bn); | 28 | int bn_bitsize(const BIGNUM *bn); |
| 28 | 29 | ||
| 30 | BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | ||
| 31 | int num); | ||
| 32 | BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | ||
| 33 | int num); | ||
| 34 | BN_ULONG bn_sub_words_borrow(const BN_ULONG *a, const BN_ULONG *b, size_t n); | ||
| 35 | BN_ULONG bn_add_words_masked(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | ||
| 36 | BN_ULONG mask, size_t n); | ||
| 37 | BN_ULONG bn_sub_words_masked(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | ||
| 38 | BN_ULONG mask, size_t n); | ||
| 39 | void bn_mod_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | ||
| 40 | const BN_ULONG *m, size_t n); | ||
| 41 | void bn_mod_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | ||
| 42 | const BN_ULONG *m, size_t n); | ||
| 43 | void bn_mod_mul_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | ||
| 44 | const BN_ULONG *m, BN_ULONG *t, BN_ULONG m0, size_t n); | ||
| 45 | void bn_mod_sqr_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *m, | ||
| 46 | BN_ULONG *t, BN_ULONG m0, size_t n); | ||
| 47 | |||
| 48 | void bn_montgomery_multiply_words(BN_ULONG *rp, const BN_ULONG *ap, | ||
| 49 | const BN_ULONG *bp, const BN_ULONG *np, BN_ULONG *tp, BN_ULONG n0, | ||
| 50 | int n_len); | ||
| 51 | void bn_montgomery_reduce_words(BN_ULONG *r, BN_ULONG *a, const BN_ULONG *n, | ||
| 52 | BN_ULONG n0, int n_len); | ||
| 53 | |||
| 29 | #ifndef HAVE_BN_CT_NE_ZERO | 54 | #ifndef HAVE_BN_CT_NE_ZERO |
| 30 | static inline int | 55 | static inline int |
| 31 | bn_ct_ne_zero(BN_ULONG w) | 56 | bn_ct_ne_zero(BN_ULONG w) |
diff --git a/src/lib/libcrypto/bn/bn_isqrt.c b/src/lib/libcrypto/bn/bn_isqrt.c index 018d5f34bd..b725519e1a 100644 --- a/src/lib/libcrypto/bn/bn_isqrt.c +++ b/src/lib/libcrypto/bn/bn_isqrt.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_isqrt.c,v 1.10 2023/06/04 17:28:35 tb Exp $ */ | 1 | /* $OpenBSD: bn_isqrt.c,v 1.11 2025/05/10 05:54:38 tb Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2022 Theo Buehler <tb@openbsd.org> | 3 | * Copyright (c) 2022 Theo Buehler <tb@openbsd.org> |
| 4 | * | 4 | * |
| @@ -19,10 +19,10 @@ | |||
| 19 | #include <stdint.h> | 19 | #include <stdint.h> |
| 20 | 20 | ||
| 21 | #include <openssl/bn.h> | 21 | #include <openssl/bn.h> |
| 22 | #include <openssl/err.h> | ||
| 23 | 22 | ||
| 24 | #include "bn_local.h" | 23 | #include "bn_local.h" |
| 25 | #include "crypto_internal.h" | 24 | #include "crypto_internal.h" |
| 25 | #include "err_local.h" | ||
| 26 | 26 | ||
| 27 | /* | 27 | /* |
| 28 | * Calculate integer square root of |n| using a variant of Newton's method. | 28 | * Calculate integer square root of |n| using a variant of Newton's method. |
diff --git a/src/lib/libcrypto/bn/bn_lib.c b/src/lib/libcrypto/bn/bn_lib.c index 72b988650c..0326e72c4d 100644 --- a/src/lib/libcrypto/bn/bn_lib.c +++ b/src/lib/libcrypto/bn/bn_lib.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_lib.c,v 1.93 2024/04/16 13:07:14 jsing Exp $ */ | 1 | /* $OpenBSD: bn_lib.c,v 1.95 2025/12/15 12:09:46 tb Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -63,10 +63,9 @@ | |||
| 63 | 63 | ||
| 64 | #include <openssl/opensslconf.h> | 64 | #include <openssl/opensslconf.h> |
| 65 | 65 | ||
| 66 | #include <openssl/err.h> | ||
| 67 | |||
| 68 | #include "bn_local.h" | 66 | #include "bn_local.h" |
| 69 | #include "bn_internal.h" | 67 | #include "bn_internal.h" |
| 68 | #include "err_local.h" | ||
| 70 | 69 | ||
| 71 | BIGNUM * | 70 | BIGNUM * |
| 72 | BN_new(void) | 71 | BN_new(void) |
| @@ -350,7 +349,7 @@ BN_ULONG | |||
| 350 | BN_get_word(const BIGNUM *a) | 349 | BN_get_word(const BIGNUM *a) |
| 351 | { | 350 | { |
| 352 | if (a->top > 1) | 351 | if (a->top > 1) |
| 353 | return BN_MASK2; | 352 | return (BN_ULONG)-1; |
| 354 | else if (a->top == 1) | 353 | else if (a->top == 1) |
| 355 | return a->d[0]; | 354 | return a->d[0]; |
| 356 | /* a->top == 0 */ | 355 | /* a->top == 0 */ |
diff --git a/src/lib/libcrypto/bn/bn_local.h b/src/lib/libcrypto/bn/bn_local.h index 067ffab3d9..2f5b58a548 100644 --- a/src/lib/libcrypto/bn/bn_local.h +++ b/src/lib/libcrypto/bn/bn_local.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_local.h,v 1.50 2025/02/13 11:04:20 tb Exp $ */ | 1 | /* $OpenBSD: bn_local.h,v 1.62 2026/01/23 08:29:04 tb Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -116,6 +116,20 @@ | |||
| 116 | 116 | ||
| 117 | #include <openssl/bn.h> | 117 | #include <openssl/bn.h> |
| 118 | 118 | ||
| 119 | #if BN_BYTES == 8 | ||
| 120 | #define BN_MASK2 UINT64_C(0xffffffffffffffff) | ||
| 121 | #define BN_MASK2l UINT64_C(0xffffffff) | ||
| 122 | #define BN_MASK2h UINT64_C(0xffffffff00000000) | ||
| 123 | #define BN_BITS 128 | ||
| 124 | #define BN_BITS4 32 | ||
| 125 | #else | ||
| 126 | #define BN_MASK2 UINT32_C(0xffffffff) | ||
| 127 | #define BN_MASK2l UINT32_C(0xffff) | ||
| 128 | #define BN_MASK2h UINT32_C(0xffff0000) | ||
| 129 | #define BN_BITS 64 | ||
| 130 | #define BN_BITS4 16 | ||
| 131 | #endif | ||
| 132 | |||
| 119 | __BEGIN_HIDDEN_DECLS | 133 | __BEGIN_HIDDEN_DECLS |
| 120 | 134 | ||
| 121 | struct bignum_st { | 135 | struct bignum_st { |
| @@ -239,12 +253,16 @@ BN_ULONG bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, | |||
| 239 | BN_ULONG bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, | 253 | BN_ULONG bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, |
| 240 | const BN_ULONG *b, int b_len); | 254 | const BN_ULONG *b, int b_len); |
| 241 | 255 | ||
| 242 | void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb); | 256 | void bn_mul_comba4(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b); |
| 243 | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); | 257 | void bn_mul_comba6(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b); |
| 244 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); | 258 | void bn_mul_comba8(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b); |
| 259 | void bn_mul_words(BN_ULONG *r, const BN_ULONG *a, int a_len, const BN_ULONG *b, | ||
| 260 | int b_len); | ||
| 245 | 261 | ||
| 246 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a); | 262 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a); |
| 263 | void bn_sqr_comba6(BN_ULONG *r, const BN_ULONG *a); | ||
| 247 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a); | 264 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a); |
| 265 | void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int a_len); | ||
| 248 | 266 | ||
| 249 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | 267 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
| 250 | const BN_ULONG *np, const BN_ULONG *n0, int num); | 268 | const BN_ULONG *np, const BN_ULONG *n0, int num); |
| @@ -254,13 +272,8 @@ int bn_expand_bits(BIGNUM *a, size_t bits); | |||
| 254 | int bn_expand_bytes(BIGNUM *a, size_t bytes); | 272 | int bn_expand_bytes(BIGNUM *a, size_t bytes); |
| 255 | int bn_wexpand(BIGNUM *a, int words); | 273 | int bn_wexpand(BIGNUM *a, int words); |
| 256 | 274 | ||
| 257 | BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | 275 | BN_ULONG bn_mulw_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); |
| 258 | int num); | 276 | BN_ULONG bn_mulw_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); |
| 259 | BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | ||
| 260 | int num); | ||
| 261 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); | ||
| 262 | BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); | ||
| 263 | void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num); | ||
| 264 | BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d); | 277 | BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d); |
| 265 | void bn_div_rem_words(BN_ULONG h, BN_ULONG l, BN_ULONG d, BN_ULONG *out_q, | 278 | void bn_div_rem_words(BN_ULONG h, BN_ULONG l, BN_ULONG d, BN_ULONG *out_q, |
| 266 | BN_ULONG *out_r); | 279 | BN_ULONG *out_r); |
| @@ -331,5 +344,11 @@ int bn_printf(BIO *bio, const BIGNUM *bn, int indent, const char *fmt, ...) | |||
| 331 | int bn_bn2hex_nosign(const BIGNUM *bn, char **out, size_t *out_len); | 344 | int bn_bn2hex_nosign(const BIGNUM *bn, char **out, size_t *out_len); |
| 332 | int bn_bn2hex_nibbles(const BIGNUM *bn, char **out, size_t *out_len); | 345 | int bn_bn2hex_nibbles(const BIGNUM *bn, char **out, size_t *out_len); |
| 333 | 346 | ||
| 347 | BIGNUM *BN_get_rfc7919_prime_2048(BIGNUM *bn); | ||
| 348 | BIGNUM *BN_get_rfc7919_prime_3072(BIGNUM *bn); | ||
| 349 | BIGNUM *BN_get_rfc7919_prime_4096(BIGNUM *bn); | ||
| 350 | BIGNUM *BN_get_rfc7919_prime_6144(BIGNUM *bn); | ||
| 351 | BIGNUM *BN_get_rfc7919_prime_8192(BIGNUM *bn); | ||
| 352 | |||
| 334 | __END_HIDDEN_DECLS | 353 | __END_HIDDEN_DECLS |
| 335 | #endif /* !HEADER_BN_LOCAL_H */ | 354 | #endif /* !HEADER_BN_LOCAL_H */ |
diff --git a/src/lib/libcrypto/bn/bn_mod.c b/src/lib/libcrypto/bn/bn_mod.c index 365f6fcf03..7198c02e3b 100644 --- a/src/lib/libcrypto/bn/bn_mod.c +++ b/src/lib/libcrypto/bn/bn_mod.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_mod.c,v 1.22 2023/07/08 12:21:58 beck Exp $ */ | 1 | /* $OpenBSD: bn_mod.c,v 1.23 2025/05/10 05:54:38 tb Exp $ */ |
| 2 | /* Includes code written by Lenka Fibikova <fibikova@exp-math.uni-essen.de> | 2 | /* Includes code written by Lenka Fibikova <fibikova@exp-math.uni-essen.de> |
| 3 | * for the OpenSSL project. */ | 3 | * for the OpenSSL project. */ |
| 4 | /* ==================================================================== | 4 | /* ==================================================================== |
| @@ -111,9 +111,8 @@ | |||
| 111 | * [including the GNU Public Licence.] | 111 | * [including the GNU Public Licence.] |
| 112 | */ | 112 | */ |
| 113 | 113 | ||
| 114 | #include <openssl/err.h> | ||
| 115 | |||
| 116 | #include "bn_local.h" | 114 | #include "bn_local.h" |
| 115 | #include "err_local.h" | ||
| 117 | 116 | ||
| 118 | int | 117 | int |
| 119 | BN_mod_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx) | 118 | BN_mod_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx) |
diff --git a/src/lib/libcrypto/bn/bn_mod_sqrt.c b/src/lib/libcrypto/bn/bn_mod_sqrt.c index 280002cc48..fc55f84317 100644 --- a/src/lib/libcrypto/bn/bn_mod_sqrt.c +++ b/src/lib/libcrypto/bn/bn_mod_sqrt.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_mod_sqrt.c,v 1.3 2023/08/03 18:53:55 tb Exp $ */ | 1 | /* $OpenBSD: bn_mod_sqrt.c,v 1.4 2025/05/10 05:54:38 tb Exp $ */ |
| 2 | 2 | ||
| 3 | /* | 3 | /* |
| 4 | * Copyright (c) 2022 Theo Buehler <tb@openbsd.org> | 4 | * Copyright (c) 2022 Theo Buehler <tb@openbsd.org> |
| @@ -16,9 +16,8 @@ | |||
| 16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | 16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #include <openssl/err.h> | ||
| 20 | |||
| 21 | #include "bn_local.h" | 19 | #include "bn_local.h" |
| 20 | #include "err_local.h" | ||
| 22 | 21 | ||
| 23 | /* | 22 | /* |
| 24 | * Tonelli-Shanks according to H. Cohen "A Course in Computational Algebraic | 23 | * Tonelli-Shanks according to H. Cohen "A Course in Computational Algebraic |
diff --git a/src/lib/libcrypto/bn/bn_mod_words.c b/src/lib/libcrypto/bn/bn_mod_words.c new file mode 100644 index 0000000000..f368e074db --- /dev/null +++ b/src/lib/libcrypto/bn/bn_mod_words.c | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | /* $OpenBSD: bn_mod_words.c,v 1.7 2025/09/07 05:21:29 jsing Exp $ */ | ||
| 2 | /* | ||
| 3 | * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> | ||
| 4 | * | ||
| 5 | * Permission to use, copy, modify, and distribute this software for any | ||
| 6 | * purpose with or without fee is hereby granted, provided that the above | ||
| 7 | * copyright notice and this permission notice appear in all copies. | ||
| 8 | * | ||
| 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #include "bn_local.h" | ||
| 19 | #include "bn_internal.h" | ||
| 20 | |||
| 21 | /* | ||
| 22 | * bn_mod_add_words() computes r[] = (a[] + b[]) mod m[], where a, b, r and | ||
| 23 | * m are arrays of words with length n (r may be the same as a or b). | ||
| 24 | */ | ||
| 25 | #ifndef HAVE_BN_MOD_ADD_WORDS | ||
| 26 | void | ||
| 27 | bn_mod_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | ||
| 28 | const BN_ULONG *m, size_t n) | ||
| 29 | { | ||
| 30 | BN_ULONG carry, mask; | ||
| 31 | |||
| 32 | /* | ||
| 33 | * Compute a + b, then compute r - m to determine if r >= m, considering | ||
| 34 | * any carry that resulted from the addition. Finally complete a | ||
| 35 | * conditional subtraction of r - m. | ||
| 36 | */ | ||
| 37 | /* XXX - change bn_add_words to use size_t. */ | ||
| 38 | carry = bn_add_words(r, a, b, n); | ||
| 39 | mask = ~(carry - bn_sub_words_borrow(r, m, n)); | ||
| 40 | bn_sub_words_masked(r, r, m, mask, n); | ||
| 41 | } | ||
| 42 | #endif | ||
| 43 | |||
| 44 | /* | ||
| 45 | * bn_mod_sub_words() computes r[] = (a[] - b[]) mod m[], where a, b, r and | ||
| 46 | * m are arrays of words with length n (r may be the same as a or b). | ||
| 47 | */ | ||
| 48 | #ifndef HAVE_BN_MOD_SUB_WORDS | ||
| 49 | void | ||
| 50 | bn_mod_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | ||
| 51 | const BN_ULONG *m, size_t n) | ||
| 52 | { | ||
| 53 | BN_ULONG borrow, mask; | ||
| 54 | |||
| 55 | /* | ||
| 56 | * Compute a - b, then complete a conditional addition of r + m | ||
| 57 | * based on the resulting borrow. | ||
| 58 | */ | ||
| 59 | /* XXX - change bn_sub_words to use size_t. */ | ||
| 60 | borrow = bn_sub_words(r, a, b, n); | ||
| 61 | mask = (0 - borrow); | ||
| 62 | bn_add_words_masked(r, r, m, mask, n); | ||
| 63 | } | ||
| 64 | #endif | ||
| 65 | |||
| 66 | /* | ||
| 67 | * bn_mod_mul_words() computes r[] = (a[] * b[]) mod m[], where a, b, r and | ||
| 68 | * m are arrays of words with length n (r may be the same as a or b) in the | ||
| 69 | * Montgomery domain. The result remains in the Montgomery domain. | ||
| 70 | */ | ||
| 71 | #ifndef HAVE_BN_MOD_MUL_WORDS | ||
| 72 | void | ||
| 73 | bn_mod_mul_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | ||
| 74 | const BN_ULONG *m, BN_ULONG *t, BN_ULONG m0, size_t n) | ||
| 75 | { | ||
| 76 | if (n == 4) { | ||
| 77 | bn_mul_comba4(t, a, b); | ||
| 78 | } else if (n == 6) { | ||
| 79 | bn_mul_comba6(t, a, b); | ||
| 80 | } else if (n == 8) { | ||
| 81 | bn_mul_comba8(t, a, b); | ||
| 82 | } else { | ||
| 83 | bn_mul_words(t, a, n, b, n); | ||
| 84 | } | ||
| 85 | bn_montgomery_reduce_words(r, t, m, m0, n); | ||
| 86 | } | ||
| 87 | #endif | ||
| 88 | |||
| 89 | /* | ||
| 90 | * bn_mod_sqr_words() computes r[] = (a[] * a[]) mod m[], where a, r and | ||
| 91 | * m are arrays of words with length n (r may be the same as a) in the | ||
| 92 | * Montgomery domain. The result remains in the Montgomery domain. | ||
| 93 | */ | ||
| 94 | #ifndef HAVE_BN_MOD_SQR_WORDS | ||
| 95 | void | ||
| 96 | bn_mod_sqr_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *m, | ||
| 97 | BN_ULONG *t, BN_ULONG m0, size_t n) | ||
| 98 | { | ||
| 99 | if (n == 4) { | ||
| 100 | bn_sqr_comba4(t, a); | ||
| 101 | } else if (n == 6) { | ||
| 102 | bn_sqr_comba6(t, a); | ||
| 103 | } else if (n == 8) { | ||
| 104 | bn_sqr_comba8(t, a); | ||
| 105 | } else { | ||
| 106 | bn_sqr_words(t, a, n); | ||
| 107 | } | ||
| 108 | bn_montgomery_reduce_words(r, t, m, m0, n); | ||
| 109 | } | ||
| 110 | #endif | ||
diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c index edd7bcd0c8..c9e95fb08b 100644 --- a/src/lib/libcrypto/bn/bn_mont.c +++ b/src/lib/libcrypto/bn/bn_mont.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_mont.c,v 1.66 2025/03/09 15:22:40 tb Exp $ */ | 1 | /* $OpenBSD: bn_mont.c,v 1.70 2025/08/30 07:54:27 jsing Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -116,6 +116,7 @@ | |||
| 116 | * sections 3.8 and 4.2 in http://security.ece.orst.edu/koc/papers/r01rsasw.pdf | 116 | * sections 3.8 and 4.2 in http://security.ece.orst.edu/koc/papers/r01rsasw.pdf |
| 117 | */ | 117 | */ |
| 118 | 118 | ||
| 119 | #include <limits.h> | ||
| 119 | #include <stdio.h> | 120 | #include <stdio.h> |
| 120 | #include <stdint.h> | 121 | #include <stdint.h> |
| 121 | #include <string.h> | 122 | #include <string.h> |
| @@ -214,7 +215,7 @@ BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) | |||
| 214 | goto err; | 215 | goto err; |
| 215 | mont->N.neg = 0; | 216 | mont->N.neg = 0; |
| 216 | mont->ri = ((BN_num_bits(mod) + BN_BITS2 - 1) / BN_BITS2) * BN_BITS2; | 217 | mont->ri = ((BN_num_bits(mod) + BN_BITS2 - 1) / BN_BITS2) * BN_BITS2; |
| 217 | if (mont->ri * 2 < mont->ri) | 218 | if (mont->ri > INT_MAX / 2) |
| 218 | goto err; | 219 | goto err; |
| 219 | 220 | ||
| 220 | /* | 221 | /* |
| @@ -316,6 +317,44 @@ BN_MONT_CTX_set_locked(BN_MONT_CTX **pmctx, int lock, const BIGNUM *mod, | |||
| 316 | LCRYPTO_ALIAS(BN_MONT_CTX_set_locked); | 317 | LCRYPTO_ALIAS(BN_MONT_CTX_set_locked); |
| 317 | 318 | ||
| 318 | /* | 319 | /* |
| 320 | * bn_montgomery_reduce_words() performs Montgomery reduction, reducing the input | ||
| 321 | * from its Montgomery form aR to a, returning the result in r. a must be twice | ||
| 322 | * the length of the modulus. Note that the input is mutated in the process of | ||
| 323 | * performing the reduction. | ||
| 324 | */ | ||
| 325 | void | ||
| 326 | bn_montgomery_reduce_words(BN_ULONG *r, BN_ULONG *a, const BN_ULONG *n, | ||
| 327 | BN_ULONG n0, int n_len) | ||
| 328 | { | ||
| 329 | BN_ULONG v, mask; | ||
| 330 | BN_ULONG carry = 0; | ||
| 331 | int i; | ||
| 332 | |||
| 333 | /* Add multiples of the modulus, so that it becomes divisible by R. */ | ||
| 334 | for (i = 0; i < n_len; i++) { | ||
| 335 | v = bn_mulw_add_words(&a[i], n, n_len, a[i] * n0); | ||
| 336 | bn_addw_addw(v, a[i + n_len], carry, &carry, &a[i + n_len]); | ||
| 337 | } | ||
| 338 | |||
| 339 | /* Divide by R (this is the equivalent of right shifting by n_len). */ | ||
| 340 | a = &a[n_len]; | ||
| 341 | |||
| 342 | /* | ||
| 343 | * The output is now in the range of [0, 2N). Attempt to reduce once by | ||
| 344 | * subtracting the modulus. If the reduction was necessary then the | ||
| 345 | * result is already in r, otherwise copy the value prior to reduction | ||
| 346 | * from the top half of a. | ||
| 347 | */ | ||
| 348 | mask = carry - bn_sub_words(r, a, n, n_len); | ||
| 349 | |||
| 350 | for (i = 0; i < n_len; i++) { | ||
| 351 | *r = (*r & ~mask) | (*a & mask); | ||
| 352 | r++; | ||
| 353 | a++; | ||
| 354 | } | ||
| 355 | } | ||
| 356 | |||
| 357 | /* | ||
| 319 | * bn_montgomery_reduce() performs Montgomery reduction, reducing the input | 358 | * bn_montgomery_reduce() performs Montgomery reduction, reducing the input |
| 320 | * from its Montgomery form aR to a, returning the result in r. Note that the | 359 | * from its Montgomery form aR to a, returning the result in r. Note that the |
| 321 | * input is mutated in the process of performing the reduction, destroying its | 360 | * input is mutated in the process of performing the reduction, destroying its |
| @@ -325,7 +364,6 @@ static int | |||
| 325 | bn_montgomery_reduce(BIGNUM *r, BIGNUM *a, BN_MONT_CTX *mctx) | 364 | bn_montgomery_reduce(BIGNUM *r, BIGNUM *a, BN_MONT_CTX *mctx) |
| 326 | { | 365 | { |
| 327 | BIGNUM *n; | 366 | BIGNUM *n; |
| 328 | BN_ULONG *ap, *rp, n0, v, carry, mask; | ||
| 329 | int i, max, n_len; | 367 | int i, max, n_len; |
| 330 | 368 | ||
| 331 | n = &mctx->N; | 369 | n = &mctx->N; |
| @@ -341,7 +379,8 @@ bn_montgomery_reduce(BIGNUM *r, BIGNUM *a, BN_MONT_CTX *mctx) | |||
| 341 | 379 | ||
| 342 | /* | 380 | /* |
| 343 | * Expand a to twice the length of the modulus, zero if necessary. | 381 | * Expand a to twice the length of the modulus, zero if necessary. |
| 344 | * XXX - make this a requirement of the caller. | 382 | * XXX - make this a requirement of the caller or use a temporary |
| 383 | * allocation. | ||
| 345 | */ | 384 | */ |
| 346 | if ((max = 2 * n_len) < n_len) | 385 | if ((max = 2 * n_len) < n_len) |
| 347 | return 0; | 386 | return 0; |
| @@ -350,33 +389,8 @@ bn_montgomery_reduce(BIGNUM *r, BIGNUM *a, BN_MONT_CTX *mctx) | |||
| 350 | for (i = a->top; i < max; i++) | 389 | for (i = a->top; i < max; i++) |
| 351 | a->d[i] = 0; | 390 | a->d[i] = 0; |
| 352 | 391 | ||
| 353 | carry = 0; | 392 | bn_montgomery_reduce_words(r->d, a->d, n->d, mctx->n0[0], n_len); |
| 354 | n0 = mctx->n0[0]; | ||
| 355 | 393 | ||
| 356 | /* Add multiples of the modulus, so that it becomes divisible by R. */ | ||
| 357 | for (i = 0; i < n_len; i++) { | ||
| 358 | v = bn_mul_add_words(&a->d[i], n->d, n_len, a->d[i] * n0); | ||
| 359 | bn_addw_addw(v, a->d[i + n_len], carry, &carry, | ||
| 360 | &a->d[i + n_len]); | ||
| 361 | } | ||
| 362 | |||
| 363 | /* Divide by R (this is the equivalent of right shifting by n_len). */ | ||
| 364 | ap = &a->d[n_len]; | ||
| 365 | |||
| 366 | /* | ||
| 367 | * The output is now in the range of [0, 2N). Attempt to reduce once by | ||
| 368 | * subtracting the modulus. If the reduction was necessary then the | ||
| 369 | * result is already in r, otherwise copy the value prior to reduction | ||
| 370 | * from the top half of a. | ||
| 371 | */ | ||
| 372 | mask = carry - bn_sub_words(r->d, ap, n->d, n_len); | ||
| 373 | |||
| 374 | rp = r->d; | ||
| 375 | for (i = 0; i < n_len; i++) { | ||
| 376 | *rp = (*rp & ~mask) | (*ap & mask); | ||
| 377 | rp++; | ||
| 378 | ap++; | ||
| 379 | } | ||
| 380 | r->top = n_len; | 394 | r->top = n_len; |
| 381 | 395 | ||
| 382 | bn_correct_top(r); | 396 | bn_correct_top(r); |
| @@ -417,7 +431,7 @@ bn_mod_mul_montgomery_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, | |||
| 417 | return ret; | 431 | return ret; |
| 418 | } | 432 | } |
| 419 | 433 | ||
| 420 | static void | 434 | static inline void |
| 421 | bn_montgomery_multiply_word(const BN_ULONG *ap, BN_ULONG b, const BN_ULONG *np, | 435 | bn_montgomery_multiply_word(const BN_ULONG *ap, BN_ULONG b, const BN_ULONG *np, |
| 422 | BN_ULONG *tp, BN_ULONG w, BN_ULONG *carry_a, BN_ULONG *carry_n, int n_len) | 436 | BN_ULONG *tp, BN_ULONG w, BN_ULONG *carry_a, BN_ULONG *carry_n, int n_len) |
| 423 | { | 437 | { |
| @@ -452,7 +466,7 @@ bn_montgomery_multiply_word(const BN_ULONG *ap, BN_ULONG b, const BN_ULONG *np, | |||
| 452 | * given word arrays. The caller must ensure that rp, ap, bp and np are all | 466 | * given word arrays. The caller must ensure that rp, ap, bp and np are all |
| 453 | * n_len words in length, while tp must be n_len * 2 + 2 words in length. | 467 | * n_len words in length, while tp must be n_len * 2 + 2 words in length. |
| 454 | */ | 468 | */ |
| 455 | static void | 469 | void |
| 456 | bn_montgomery_multiply_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | 470 | bn_montgomery_multiply_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
| 457 | const BN_ULONG *np, BN_ULONG *tp, BN_ULONG n0, int n_len) | 471 | const BN_ULONG *np, BN_ULONG *tp, BN_ULONG n0, int n_len) |
| 458 | { | 472 | { |
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c index bdeb9b0fe8..7db0f61849 100644 --- a/src/lib/libcrypto/bn/bn_mul.c +++ b/src/lib/libcrypto/bn/bn_mul.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_mul.c,v 1.39 2023/07/08 12:21:58 beck Exp $ */ | 1 | /* $OpenBSD: bn_mul.c,v 1.46 2025/09/01 15:39:59 jsing Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -57,6 +57,7 @@ | |||
| 57 | */ | 57 | */ |
| 58 | 58 | ||
| 59 | #include <assert.h> | 59 | #include <assert.h> |
| 60 | #include <limits.h> | ||
| 60 | #include <stdio.h> | 61 | #include <stdio.h> |
| 61 | #include <string.h> | 62 | #include <string.h> |
| 62 | 63 | ||
| @@ -73,7 +74,7 @@ | |||
| 73 | */ | 74 | */ |
| 74 | #ifndef HAVE_BN_MUL_COMBA4 | 75 | #ifndef HAVE_BN_MUL_COMBA4 |
| 75 | void | 76 | void |
| 76 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 77 | bn_mul_comba4(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b) |
| 77 | { | 78 | { |
| 78 | BN_ULONG c0, c1, c2; | 79 | BN_ULONG c0, c1, c2; |
| 79 | 80 | ||
| @@ -103,13 +104,73 @@ bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
| 103 | #endif | 104 | #endif |
| 104 | 105 | ||
| 105 | /* | 106 | /* |
| 107 | * bn_mul_comba6() computes r[] = a[] * b[] using Comba multiplication | ||
| 108 | * (https://everything2.com/title/Comba+multiplication), where a and b are both | ||
| 109 | * six word arrays, producing a 12 word array result. | ||
| 110 | */ | ||
| 111 | #ifndef HAVE_BN_MUL_COMBA6 | ||
| 112 | void | ||
| 113 | bn_mul_comba6(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b) | ||
| 114 | { | ||
| 115 | BN_ULONG c0, c1, c2; | ||
| 116 | |||
| 117 | bn_mulw_addtw(a[0], b[0], 0, 0, 0, &c2, &c1, &r[0]); | ||
| 118 | |||
| 119 | bn_mulw_addtw(a[0], b[1], 0, c2, c1, &c2, &c1, &c0); | ||
| 120 | bn_mulw_addtw(a[1], b[0], c2, c1, c0, &c2, &c1, &r[1]); | ||
| 121 | |||
| 122 | bn_mulw_addtw(a[2], b[0], 0, c2, c1, &c2, &c1, &c0); | ||
| 123 | bn_mulw_addtw(a[1], b[1], c2, c1, c0, &c2, &c1, &c0); | ||
| 124 | bn_mulw_addtw(a[0], b[2], c2, c1, c0, &c2, &c1, &r[2]); | ||
| 125 | |||
| 126 | bn_mulw_addtw(a[0], b[3], 0, c2, c1, &c2, &c1, &c0); | ||
| 127 | bn_mulw_addtw(a[1], b[2], c2, c1, c0, &c2, &c1, &c0); | ||
| 128 | bn_mulw_addtw(a[2], b[1], c2, c1, c0, &c2, &c1, &c0); | ||
| 129 | bn_mulw_addtw(a[3], b[0], c2, c1, c0, &c2, &c1, &r[3]); | ||
| 130 | |||
| 131 | bn_mulw_addtw(a[4], b[0], 0, c2, c1, &c2, &c1, &c0); | ||
| 132 | bn_mulw_addtw(a[3], b[1], c2, c1, c0, &c2, &c1, &c0); | ||
| 133 | bn_mulw_addtw(a[2], b[2], c2, c1, c0, &c2, &c1, &c0); | ||
| 134 | bn_mulw_addtw(a[1], b[3], c2, c1, c0, &c2, &c1, &c0); | ||
| 135 | bn_mulw_addtw(a[0], b[4], c2, c1, c0, &c2, &c1, &r[4]); | ||
| 136 | |||
| 137 | bn_mulw_addtw(a[0], b[5], 0, c2, c1, &c2, &c1, &c0); | ||
| 138 | bn_mulw_addtw(a[1], b[4], c2, c1, c0, &c2, &c1, &c0); | ||
| 139 | bn_mulw_addtw(a[2], b[3], c2, c1, c0, &c2, &c1, &c0); | ||
| 140 | bn_mulw_addtw(a[3], b[2], c2, c1, c0, &c2, &c1, &c0); | ||
| 141 | bn_mulw_addtw(a[4], b[1], c2, c1, c0, &c2, &c1, &c0); | ||
| 142 | bn_mulw_addtw(a[5], b[0], c2, c1, c0, &c2, &c1, &r[5]); | ||
| 143 | |||
| 144 | bn_mulw_addtw(a[5], b[1], 0, c2, c1, &c2, &c1, &c0); | ||
| 145 | bn_mulw_addtw(a[4], b[2], c2, c1, c0, &c2, &c1, &c0); | ||
| 146 | bn_mulw_addtw(a[3], b[3], c2, c1, c0, &c2, &c1, &c0); | ||
| 147 | bn_mulw_addtw(a[2], b[4], c2, c1, c0, &c2, &c1, &c0); | ||
| 148 | bn_mulw_addtw(a[1], b[5], c2, c1, c0, &c2, &c1, &r[6]); | ||
| 149 | |||
| 150 | bn_mulw_addtw(a[2], b[5], 0, c2, c1, &c2, &c1, &c0); | ||
| 151 | bn_mulw_addtw(a[3], b[4], c2, c1, c0, &c2, &c1, &c0); | ||
| 152 | bn_mulw_addtw(a[4], b[3], c2, c1, c0, &c2, &c1, &c0); | ||
| 153 | bn_mulw_addtw(a[5], b[2], c2, c1, c0, &c2, &c1, &r[7]); | ||
| 154 | |||
| 155 | bn_mulw_addtw(a[5], b[3], 0, c2, c1, &c2, &c1, &c0); | ||
| 156 | bn_mulw_addtw(a[4], b[4], c2, c1, c0, &c2, &c1, &c0); | ||
| 157 | bn_mulw_addtw(a[3], b[5], c2, c1, c0, &c2, &c1, &r[8]); | ||
| 158 | |||
| 159 | bn_mulw_addtw(a[4], b[5], 0, c2, c1, &c2, &c1, &c0); | ||
| 160 | bn_mulw_addtw(a[5], b[4], c2, c1, c0, &c2, &c1, &r[9]); | ||
| 161 | |||
| 162 | bn_mulw_addtw(a[5], b[5], 0, c2, c1, &c2, &r[11], &r[10]); | ||
| 163 | } | ||
| 164 | #endif | ||
| 165 | |||
| 166 | /* | ||
| 106 | * bn_mul_comba8() computes r[] = a[] * b[] using Comba multiplication | 167 | * bn_mul_comba8() computes r[] = a[] * b[] using Comba multiplication |
| 107 | * (https://everything2.com/title/Comba+multiplication), where a and b are both | 168 | * (https://everything2.com/title/Comba+multiplication), where a and b are both |
| 108 | * eight word arrays, producing a 16 word array result. | 169 | * eight word arrays, producing a 16 word array result. |
| 109 | */ | 170 | */ |
| 110 | #ifndef HAVE_BN_MUL_COMBA8 | 171 | #ifndef HAVE_BN_MUL_COMBA8 |
| 111 | void | 172 | void |
| 112 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 173 | bn_mul_comba8(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b) |
| 113 | { | 174 | { |
| 114 | BN_ULONG c0, c1, c2; | 175 | BN_ULONG c0, c1, c2; |
| 115 | 176 | ||
| @@ -195,14 +256,13 @@ bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
| 195 | #endif | 256 | #endif |
| 196 | 257 | ||
| 197 | /* | 258 | /* |
| 198 | * bn_mul_words() computes (carry:r[i]) = a[i] * w + carry, where a is an array | 259 | * bn_mulw_words() computes (carry:r[i]) = a[i] * w + carry, where a is an array |
| 199 | * of words and w is a single word. This should really be called bn_mulw_words() | 260 | * of words and w is a single word. This is used as a step in the multiplication |
| 200 | * since only one input is an array. This is used as a step in the multiplication | ||
| 201 | * of word arrays. | 261 | * of word arrays. |
| 202 | */ | 262 | */ |
| 203 | #ifndef HAVE_BN_MUL_WORDS | 263 | #ifndef HAVE_BN_MULW_WORDS |
| 204 | BN_ULONG | 264 | BN_ULONG |
| 205 | bn_mul_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w) | 265 | bn_mulw_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w) |
| 206 | { | 266 | { |
| 207 | BN_ULONG carry = 0; | 267 | BN_ULONG carry = 0; |
| 208 | 268 | ||
| @@ -228,14 +288,13 @@ bn_mul_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w) | |||
| 228 | #endif | 288 | #endif |
| 229 | 289 | ||
| 230 | /* | 290 | /* |
| 231 | * bn_mul_add_words() computes (carry:r[i]) = a[i] * w + r[i] + carry, where | 291 | * bn_mulw_add_words() computes (carry:r[i]) = a[i] * w + r[i] + carry, where |
| 232 | * a is an array of words and w is a single word. This should really be called | 292 | * a is an array of words and w is a single word. This is used as a step in the |
| 233 | * bn_mulw_add_words() since only one input is an array. This is used as a step | 293 | * multiplication of word arrays. |
| 234 | * in the multiplication of word arrays. | ||
| 235 | */ | 294 | */ |
| 236 | #ifndef HAVE_BN_MUL_ADD_WORDS | 295 | #ifndef HAVE_BN_MULW_ADD_WORDS |
| 237 | BN_ULONG | 296 | BN_ULONG |
| 238 | bn_mul_add_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w) | 297 | bn_mulw_add_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w) |
| 239 | { | 298 | { |
| 240 | BN_ULONG carry = 0; | 299 | BN_ULONG carry = 0; |
| 241 | 300 | ||
| @@ -262,62 +321,60 @@ bn_mul_add_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w) | |||
| 262 | } | 321 | } |
| 263 | #endif | 322 | #endif |
| 264 | 323 | ||
| 324 | #ifndef HAVE_BN_MUL_WORDS | ||
| 265 | void | 325 | void |
| 266 | bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) | 326 | bn_mul_words(BN_ULONG *r, const BN_ULONG *a, int a_len, const BN_ULONG *b, |
| 327 | int b_len) | ||
| 267 | { | 328 | { |
| 268 | BN_ULONG *rr; | 329 | BN_ULONG *rr; |
| 269 | 330 | ||
| 270 | 331 | if (a_len < b_len) { | |
| 271 | if (na < nb) { | ||
| 272 | int itmp; | 332 | int itmp; |
| 273 | BN_ULONG *ltmp; | 333 | const BN_ULONG *ltmp; |
| 274 | 334 | ||
| 275 | itmp = na; | 335 | itmp = a_len; |
| 276 | na = nb; | 336 | a_len = b_len; |
| 277 | nb = itmp; | 337 | b_len = itmp; |
| 278 | ltmp = a; | 338 | ltmp = a; |
| 279 | a = b; | 339 | a = b; |
| 280 | b = ltmp; | 340 | b = ltmp; |
| 281 | 341 | ||
| 282 | } | 342 | } |
| 283 | rr = &(r[na]); | 343 | rr = &(r[a_len]); |
| 284 | if (nb <= 0) { | 344 | if (b_len <= 0) { |
| 285 | (void)bn_mul_words(r, a, na, 0); | 345 | (void)bn_mulw_words(r, a, a_len, 0); |
| 286 | return; | 346 | return; |
| 287 | } else | 347 | } else |
| 288 | rr[0] = bn_mul_words(r, a, na, b[0]); | 348 | rr[0] = bn_mulw_words(r, a, a_len, b[0]); |
| 289 | 349 | ||
| 290 | for (;;) { | 350 | for (;;) { |
| 291 | if (--nb <= 0) | 351 | if (--b_len <= 0) |
| 292 | return; | 352 | return; |
| 293 | rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]); | 353 | rr[1] = bn_mulw_add_words(&(r[1]), a, a_len, b[1]); |
| 294 | if (--nb <= 0) | 354 | if (--b_len <= 0) |
| 295 | return; | 355 | return; |
| 296 | rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]); | 356 | rr[2] = bn_mulw_add_words(&(r[2]), a, a_len, b[2]); |
| 297 | if (--nb <= 0) | 357 | if (--b_len <= 0) |
| 298 | return; | 358 | return; |
| 299 | rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]); | 359 | rr[3] = bn_mulw_add_words(&(r[3]), a, a_len, b[3]); |
| 300 | if (--nb <= 0) | 360 | if (--b_len <= 0) |
| 301 | return; | 361 | return; |
| 302 | rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]); | 362 | rr[4] = bn_mulw_add_words(&(r[4]), a, a_len, b[4]); |
| 303 | rr += 4; | 363 | rr += 4; |
| 304 | r += 4; | 364 | r += 4; |
| 305 | b += 4; | 365 | b += 4; |
| 306 | } | 366 | } |
| 307 | } | 367 | } |
| 368 | #endif | ||
| 308 | 369 | ||
| 309 | 370 | static int | |
| 310 | #ifndef HAVE_BN_MUL | ||
| 311 | int | ||
| 312 | bn_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, int rn, BN_CTX *ctx) | 371 | bn_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, int rn, BN_CTX *ctx) |
| 313 | { | 372 | { |
| 314 | bn_mul_normal(r->d, a->d, a->top, b->d, b->top); | 373 | bn_mul_words(r->d, a->d, a->top, b->d, b->top); |
| 315 | 374 | ||
| 316 | return 1; | 375 | return 1; |
| 317 | } | 376 | } |
| 318 | 377 | ||
| 319 | #endif /* HAVE_BN_MUL */ | ||
| 320 | |||
| 321 | int | 378 | int |
| 322 | BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) | 379 | BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) |
| 323 | { | 380 | { |
| @@ -338,14 +395,16 @@ BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) | |||
| 338 | if (rr == NULL) | 395 | if (rr == NULL) |
| 339 | goto err; | 396 | goto err; |
| 340 | 397 | ||
| 341 | rn = a->top + b->top; | 398 | if (a->top > INT_MAX - b->top) |
| 342 | if (rn < a->top) | ||
| 343 | goto err; | 399 | goto err; |
| 400 | rn = a->top + b->top; | ||
| 344 | if (!bn_wexpand(rr, rn)) | 401 | if (!bn_wexpand(rr, rn)) |
| 345 | goto err; | 402 | goto err; |
| 346 | 403 | ||
| 347 | if (a->top == 4 && b->top == 4) { | 404 | if (a->top == 4 && b->top == 4) { |
| 348 | bn_mul_comba4(rr->d, a->d, b->d); | 405 | bn_mul_comba4(rr->d, a->d, b->d); |
| 406 | } else if (a->top == 6 && b->top == 6) { | ||
| 407 | bn_mul_comba6(rr->d, a->d, b->d); | ||
| 349 | } else if (a->top == 8 && b->top == 8) { | 408 | } else if (a->top == 8 && b->top == 8) { |
| 350 | bn_mul_comba8(rr->d, a->d, b->d); | 409 | bn_mul_comba8(rr->d, a->d, b->d); |
| 351 | } else { | 410 | } else { |
diff --git a/src/lib/libcrypto/bn/bn_prime.c b/src/lib/libcrypto/bn/bn_prime.c index 5a4aa50bf1..3d7f18a8ea 100644 --- a/src/lib/libcrypto/bn/bn_prime.c +++ b/src/lib/libcrypto/bn/bn_prime.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_prime.c,v 1.34 2023/07/20 06:26:27 tb Exp $ */ | 1 | /* $OpenBSD: bn_prime.c,v 1.37 2025/11/08 16:27:33 tb Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -109,12 +109,12 @@ | |||
| 109 | * | 109 | * |
| 110 | */ | 110 | */ |
| 111 | 111 | ||
| 112 | #include <stdio.h> | 112 | #include <stddef.h> |
| 113 | #include <time.h> | ||
| 114 | 113 | ||
| 115 | #include <openssl/err.h> | 114 | #include <openssl/bn.h> |
| 116 | 115 | ||
| 117 | #include "bn_local.h" | 116 | #include "bn_local.h" |
| 117 | #include "err_local.h" | ||
| 118 | 118 | ||
| 119 | /* The quick sieve algorithm approach to weeding out primes is | 119 | /* The quick sieve algorithm approach to weeding out primes is |
| 120 | * Philip Zimmermann's, as implemented in PGP. I have had a read of | 120 | * Philip Zimmermann's, as implemented in PGP. I have had a read of |
| @@ -339,7 +339,7 @@ probable_prime_dh(BIGNUM *rnd, int bits, const BIGNUM *add, const BIGNUM *rem, | |||
| 339 | loop: | 339 | loop: |
| 340 | for (i = 1; i < NUMPRIMES; i++) { | 340 | for (i = 1; i < NUMPRIMES; i++) { |
| 341 | /* check that rnd is a prime */ | 341 | /* check that rnd is a prime */ |
| 342 | BN_LONG mod = BN_mod_word(rnd, primes[i]); | 342 | BN_ULONG mod = BN_mod_word(rnd, primes[i]); |
| 343 | if (mod == (BN_ULONG)-1) | 343 | if (mod == (BN_ULONG)-1) |
| 344 | goto err; | 344 | goto err; |
| 345 | if (mod <= 1) { | 345 | if (mod <= 1) { |
diff --git a/src/lib/libcrypto/bn/bn_rand.c b/src/lib/libcrypto/bn/bn_rand.c index 9cfcd8e2c0..d3b16f70a0 100644 --- a/src/lib/libcrypto/bn/bn_rand.c +++ b/src/lib/libcrypto/bn/bn_rand.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_rand.c,v 1.30 2024/03/16 20:42:33 tb Exp $ */ | 1 | /* $OpenBSD: bn_rand.c,v 1.31 2025/05/10 05:54:38 tb Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -115,9 +115,8 @@ | |||
| 115 | #include <string.h> | 115 | #include <string.h> |
| 116 | #include <time.h> | 116 | #include <time.h> |
| 117 | 117 | ||
| 118 | #include <openssl/err.h> | ||
| 119 | |||
| 120 | #include "bn_local.h" | 118 | #include "bn_local.h" |
| 119 | #include "err_local.h" | ||
| 121 | 120 | ||
| 122 | static int | 121 | static int |
| 123 | bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) | 122 | bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) |
diff --git a/src/lib/libcrypto/bn/bn_recp.c b/src/lib/libcrypto/bn/bn_recp.c index e3f22c52a9..ed5049b772 100644 --- a/src/lib/libcrypto/bn/bn_recp.c +++ b/src/lib/libcrypto/bn/bn_recp.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_recp.c,v 1.33 2025/02/04 20:22:20 tb Exp $ */ | 1 | /* $OpenBSD: bn_recp.c,v 1.34 2025/05/10 05:54:38 tb Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -58,9 +58,8 @@ | |||
| 58 | 58 | ||
| 59 | #include <stdio.h> | 59 | #include <stdio.h> |
| 60 | 60 | ||
| 61 | #include <openssl/err.h> | ||
| 62 | |||
| 63 | #include "bn_local.h" | 61 | #include "bn_local.h" |
| 62 | #include "err_local.h" | ||
| 64 | 63 | ||
| 65 | struct bn_recp_ctx_st { | 64 | struct bn_recp_ctx_st { |
| 66 | BIGNUM *N; /* the divisor */ | 65 | BIGNUM *N; /* the divisor */ |
diff --git a/src/lib/libcrypto/bn/bn_shift.c b/src/lib/libcrypto/bn/bn_shift.c index 12edc7c0a0..b9f73cc322 100644 --- a/src/lib/libcrypto/bn/bn_shift.c +++ b/src/lib/libcrypto/bn/bn_shift.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_shift.c,v 1.22 2023/07/08 12:21:58 beck Exp $ */ | 1 | /* $OpenBSD: bn_shift.c,v 1.23 2025/05/10 05:54:38 tb Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2022, 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2022, 2023 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -16,9 +16,9 @@ | |||
| 16 | */ | 16 | */ |
| 17 | 17 | ||
| 18 | #include <openssl/bn.h> | 18 | #include <openssl/bn.h> |
| 19 | #include <openssl/err.h> | ||
| 20 | 19 | ||
| 21 | #include "bn_local.h" | 20 | #include "bn_local.h" |
| 21 | #include "err_local.h" | ||
| 22 | 22 | ||
| 23 | static inline int | 23 | static inline int |
| 24 | bn_lshift(BIGNUM *r, const BIGNUM *a, int n) | 24 | bn_lshift(BIGNUM *r, const BIGNUM *a, int n) |
diff --git a/src/lib/libcrypto/bn/bn_sqr.c b/src/lib/libcrypto/bn/bn_sqr.c index 0dbccbf85d..27e08bdf13 100644 --- a/src/lib/libcrypto/bn/bn_sqr.c +++ b/src/lib/libcrypto/bn/bn_sqr.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_sqr.c,v 1.36 2023/07/08 12:21:58 beck Exp $ */ | 1 | /* $OpenBSD: bn_sqr.c,v 1.42 2025/09/07 05:21:29 jsing Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -64,8 +64,6 @@ | |||
| 64 | #include "bn_local.h" | 64 | #include "bn_local.h" |
| 65 | #include "bn_internal.h" | 65 | #include "bn_internal.h" |
| 66 | 66 | ||
| 67 | int bn_sqr(BIGNUM *r, const BIGNUM *a, int max, BN_CTX *ctx); | ||
| 68 | |||
| 69 | /* | 67 | /* |
| 70 | * bn_sqr_comba4() computes r[] = a[] * a[] using Comba multiplication | 68 | * bn_sqr_comba4() computes r[] = a[] * a[] using Comba multiplication |
| 71 | * (https://everything2.com/title/Comba+multiplication), where a is a | 69 | * (https://everything2.com/title/Comba+multiplication), where a is a |
| @@ -97,6 +95,51 @@ bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | |||
| 97 | #endif | 95 | #endif |
| 98 | 96 | ||
| 99 | /* | 97 | /* |
| 98 | * bn_sqr_comba6() computes r[] = a[] * a[] using Comba multiplication | ||
| 99 | * (https://everything2.com/title/Comba+multiplication), where a is an | ||
| 100 | * six word array, producing an 12 word array result. | ||
| 101 | */ | ||
| 102 | #ifndef HAVE_BN_SQR_COMBA6 | ||
| 103 | void | ||
| 104 | bn_sqr_comba6(BN_ULONG *r, const BN_ULONG *a) | ||
| 105 | { | ||
| 106 | BN_ULONG c2, c1, c0; | ||
| 107 | |||
| 108 | bn_mulw_addtw(a[0], a[0], 0, 0, 0, &c2, &c1, &r[0]); | ||
| 109 | |||
| 110 | bn_mul2_mulw_addtw(a[1], a[0], 0, c2, c1, &c2, &c1, &r[1]); | ||
| 111 | |||
| 112 | bn_mulw_addtw(a[1], a[1], 0, c2, c1, &c2, &c1, &c0); | ||
| 113 | bn_mul2_mulw_addtw(a[2], a[0], c2, c1, c0, &c2, &c1, &r[2]); | ||
| 114 | |||
| 115 | bn_mul2_mulw_addtw(a[3], a[0], 0, c2, c1, &c2, &c1, &c0); | ||
| 116 | bn_mul2_mulw_addtw(a[2], a[1], c2, c1, c0, &c2, &c1, &r[3]); | ||
| 117 | |||
| 118 | bn_mulw_addtw(a[2], a[2], 0, c2, c1, &c2, &c1, &c0); | ||
| 119 | bn_mul2_mulw_addtw(a[3], a[1], c2, c1, c0, &c2, &c1, &c0); | ||
| 120 | bn_mul2_mulw_addtw(a[4], a[0], c2, c1, c0, &c2, &c1, &r[4]); | ||
| 121 | |||
| 122 | bn_mul2_mulw_addtw(a[5], a[0], 0, c2, c1, &c2, &c1, &c0); | ||
| 123 | bn_mul2_mulw_addtw(a[4], a[1], c2, c1, c0, &c2, &c1, &c0); | ||
| 124 | bn_mul2_mulw_addtw(a[3], a[2], c2, c1, c0, &c2, &c1, &r[5]); | ||
| 125 | |||
| 126 | bn_mulw_addtw(a[3], a[3], 0, c2, c1, &c2, &c1, &c0); | ||
| 127 | bn_mul2_mulw_addtw(a[4], a[2], c2, c1, c0, &c2, &c1, &c0); | ||
| 128 | bn_mul2_mulw_addtw(a[5], a[1], c2, c1, c0, &c2, &c1, &r[6]); | ||
| 129 | |||
| 130 | bn_mul2_mulw_addtw(a[5], a[2], 0, c2, c1, &c2, &c1, &c0); | ||
| 131 | bn_mul2_mulw_addtw(a[4], a[3], c2, c1, c0, &c2, &c1, &r[7]); | ||
| 132 | |||
| 133 | bn_mulw_addtw(a[4], a[4], 0, c2, c1, &c2, &c1, &c0); | ||
| 134 | bn_mul2_mulw_addtw(a[5], a[3], c2, c1, c0, &c2, &c1, &r[8]); | ||
| 135 | |||
| 136 | bn_mul2_mulw_addtw(a[5], a[4], 0, c2, c1, &c2, &c1, &r[9]); | ||
| 137 | |||
| 138 | bn_mulw_addtw(a[5], a[5], 0, c2, c1, &c2, &r[11], &r[10]); | ||
| 139 | } | ||
| 140 | #endif | ||
| 141 | |||
| 142 | /* | ||
| 100 | * bn_sqr_comba8() computes r[] = a[] * a[] using Comba multiplication | 143 | * bn_sqr_comba8() computes r[] = a[] * a[] using Comba multiplication |
| 101 | * (https://everything2.com/title/Comba+multiplication), where a is an | 144 | * (https://everything2.com/title/Comba+multiplication), where a is an |
| 102 | * eight word array, producing an 16 word array result. | 145 | * eight word array, producing an 16 word array result. |
| @@ -160,7 +203,7 @@ bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | |||
| 160 | } | 203 | } |
| 161 | #endif | 204 | #endif |
| 162 | 205 | ||
| 163 | #ifndef HAVE_BN_SQR | 206 | #ifndef HAVE_BN_SQR_WORDS |
| 164 | /* | 207 | /* |
| 165 | * bn_sqr_add_words() computes (r[i*2+1]:r[i*2]) = (r[i*2+1]:r[i*2]) + a[i] * a[i]. | 208 | * bn_sqr_add_words() computes (r[i*2+1]:r[i*2]) = (r[i*2+1]:r[i*2]) + a[i] * a[i]. |
| 166 | */ | 209 | */ |
| @@ -197,12 +240,16 @@ bn_sqr_add_words(BN_ULONG *r, const BN_ULONG *a, int n) | |||
| 197 | } | 240 | } |
| 198 | } | 241 | } |
| 199 | 242 | ||
| 200 | static void | 243 | /* |
| 201 | bn_sqr_normal(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len) | 244 | * bn_sqr_words() computes r[] = a[] * a[]. |
| 245 | */ | ||
| 246 | void | ||
| 247 | bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int a_len) | ||
| 202 | { | 248 | { |
| 203 | const BN_ULONG *ap; | 249 | const BN_ULONG *ap; |
| 204 | BN_ULONG *rp; | 250 | BN_ULONG *rp; |
| 205 | BN_ULONG w; | 251 | BN_ULONG w; |
| 252 | int r_len; | ||
| 206 | int n; | 253 | int n; |
| 207 | 254 | ||
| 208 | if (a_len <= 0) | 255 | if (a_len <= 0) |
| @@ -213,13 +260,14 @@ bn_sqr_normal(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len) | |||
| 213 | ap++; | 260 | ap++; |
| 214 | 261 | ||
| 215 | rp = r; | 262 | rp = r; |
| 263 | r_len = a_len * 2; | ||
| 216 | rp[0] = rp[r_len - 1] = 0; | 264 | rp[0] = rp[r_len - 1] = 0; |
| 217 | rp++; | 265 | rp++; |
| 218 | 266 | ||
| 219 | /* Compute initial product - r[n:1] = a[n:1] * a[0] */ | 267 | /* Compute initial product - r[n:1] = a[n:1] * a[0] */ |
| 220 | n = a_len - 1; | 268 | n = a_len - 1; |
| 221 | if (n > 0) { | 269 | if (n > 0) { |
| 222 | rp[n] = bn_mul_words(rp, ap, n, w); | 270 | rp[n] = bn_mulw_words(rp, ap, n, w); |
| 223 | } | 271 | } |
| 224 | rp += 2; | 272 | rp += 2; |
| 225 | n--; | 273 | n--; |
| @@ -229,7 +277,7 @@ bn_sqr_normal(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len) | |||
| 229 | w = ap[0]; | 277 | w = ap[0]; |
| 230 | ap++; | 278 | ap++; |
| 231 | 279 | ||
| 232 | rp[n] = bn_mul_add_words(rp, ap, n, w); | 280 | rp[n] = bn_mulw_add_words(rp, ap, n, w); |
| 233 | rp += 2; | 281 | rp += 2; |
| 234 | n--; | 282 | n--; |
| 235 | } | 283 | } |
| @@ -240,20 +288,20 @@ bn_sqr_normal(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len) | |||
| 240 | /* Add squares. */ | 288 | /* Add squares. */ |
| 241 | bn_sqr_add_words(r, a, a_len); | 289 | bn_sqr_add_words(r, a, a_len); |
| 242 | } | 290 | } |
| 291 | #endif | ||
| 243 | 292 | ||
| 244 | /* | 293 | /* |
| 245 | * bn_sqr() computes a * a, storing the result in r. The caller must ensure that | 294 | * bn_sqr() computes a * a, storing the result in r. The caller must ensure that |
| 246 | * r is not the same BIGNUM as a and that r has been expanded to rn = a->top * 2 | 295 | * r is not the same BIGNUM as a and that r has been expanded to rn = a->top * 2 |
| 247 | * words. | 296 | * words. |
| 248 | */ | 297 | */ |
| 249 | int | 298 | static int |
| 250 | bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx) | 299 | bn_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) |
| 251 | { | 300 | { |
| 252 | bn_sqr_normal(r->d, r_len, a->d, a->top); | 301 | bn_sqr_words(r->d, a->d, a->top); |
| 253 | 302 | ||
| 254 | return 1; | 303 | return 1; |
| 255 | } | 304 | } |
| 256 | #endif | ||
| 257 | 305 | ||
| 258 | int | 306 | int |
| 259 | BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) | 307 | BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) |
| @@ -281,10 +329,12 @@ BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) | |||
| 281 | 329 | ||
| 282 | if (a->top == 4) { | 330 | if (a->top == 4) { |
| 283 | bn_sqr_comba4(rr->d, a->d); | 331 | bn_sqr_comba4(rr->d, a->d); |
| 332 | } else if (a->top == 6) { | ||
| 333 | bn_sqr_comba6(rr->d, a->d); | ||
| 284 | } else if (a->top == 8) { | 334 | } else if (a->top == 8) { |
| 285 | bn_sqr_comba8(rr->d, a->d); | 335 | bn_sqr_comba8(rr->d, a->d); |
| 286 | } else { | 336 | } else { |
| 287 | if (!bn_sqr(rr, a, r_len, ctx)) | 337 | if (!bn_sqr(rr, a, ctx)) |
| 288 | goto err; | 338 | goto err; |
| 289 | } | 339 | } |
| 290 | 340 | ||
diff --git a/src/lib/libcrypto/bn/bn_word.c b/src/lib/libcrypto/bn/bn_word.c index a82b911e67..e035878cb9 100644 --- a/src/lib/libcrypto/bn/bn_word.c +++ b/src/lib/libcrypto/bn/bn_word.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_word.c,v 1.21 2023/07/08 12:21:58 beck Exp $ */ | 1 | /* $OpenBSD: bn_word.c,v 1.22 2025/08/30 07:54:27 jsing Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -232,7 +232,7 @@ BN_mul_word(BIGNUM *a, BN_ULONG w) | |||
| 232 | if (w == 0) | 232 | if (w == 0) |
| 233 | BN_zero(a); | 233 | BN_zero(a); |
| 234 | else { | 234 | else { |
| 235 | ll = bn_mul_words(a->d, a->d, a->top, w); | 235 | ll = bn_mulw_words(a->d, a->d, a->top, w); |
| 236 | if (ll) { | 236 | if (ll) { |
| 237 | if (!bn_wexpand(a, a->top + 1)) | 237 | if (!bn_wexpand(a, a->top + 1)) |
| 238 | return (0); | 238 | return (0); |
diff --git a/src/lib/libcrypto/bn/s2n_bignum.h b/src/lib/libcrypto/bn/s2n_bignum.h index ce6e8cdc94..7d77894cdc 100644 --- a/src/lib/libcrypto/bn/s2n_bignum.h +++ b/src/lib/libcrypto/bn/s2n_bignum.h | |||
| @@ -1,3 +1,5 @@ | |||
| 1 | // $OpenBSD: s2n_bignum.h,v 1.4 2025/08/12 10:01:37 jsing Exp $ | ||
| 2 | // | ||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | // | 4 | // |
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
| @@ -34,182 +36,240 @@ | |||
| 34 | // throughput, generally offering higher performance there. | 36 | // throughput, generally offering higher performance there. |
| 35 | // ---------------------------------------------------------------------------- | 37 | // ---------------------------------------------------------------------------- |
| 36 | 38 | ||
| 39 | |||
| 40 | #if defined(_MSC_VER) || !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L || defined(__STDC_NO_VLA__) | ||
| 41 | #define S2N_BIGNUM_STATIC | ||
| 42 | #else | ||
| 43 | #define S2N_BIGNUM_STATIC static | ||
| 44 | #endif | ||
| 45 | |||
| 37 | // Add, z := x + y | 46 | // Add, z := x + y |
| 38 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] | 47 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] |
| 39 | extern uint64_t bignum_add (uint64_t p, uint64_t *z, uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | 48 | extern uint64_t bignum_add (uint64_t p, uint64_t *z, uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y); |
| 40 | 49 | ||
| 41 | // Add modulo p_25519, z := (x + y) mod p_25519, assuming x and y reduced | 50 | // Add modulo p_25519, z := (x + y) mod p_25519, assuming x and y reduced |
| 42 | // Inputs x[4], y[4]; output z[4] | 51 | // Inputs x[4], y[4]; output z[4] |
| 43 | extern void bignum_add_p25519 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 52 | extern void bignum_add_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
| 44 | 53 | ||
| 45 | // Add modulo p_256, z := (x + y) mod p_256, assuming x and y reduced | 54 | // Add modulo p_256, z := (x + y) mod p_256, assuming x and y reduced |
| 46 | // Inputs x[4], y[4]; output z[4] | 55 | // Inputs x[4], y[4]; output z[4] |
| 47 | extern void bignum_add_p256 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 56 | extern void bignum_add_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
| 48 | 57 | ||
| 49 | // Add modulo p_256k1, z := (x + y) mod p_256k1, assuming x and y reduced | 58 | // Add modulo p_256k1, z := (x + y) mod p_256k1, assuming x and y reduced |
| 50 | // Inputs x[4], y[4]; output z[4] | 59 | // Inputs x[4], y[4]; output z[4] |
| 51 | extern void bignum_add_p256k1 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 60 | extern void bignum_add_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
| 52 | 61 | ||
| 53 | // Add modulo p_384, z := (x + y) mod p_384, assuming x and y reduced | 62 | // Add modulo p_384, z := (x + y) mod p_384, assuming x and y reduced |
| 54 | // Inputs x[6], y[6]; output z[6] | 63 | // Inputs x[6], y[6]; output z[6] |
| 55 | extern void bignum_add_p384 (uint64_t z[static 6], uint64_t x[static 6], uint64_t y[static 6]); | 64 | extern void bignum_add_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]); |
| 56 | 65 | ||
| 57 | // Add modulo p_521, z := (x + y) mod p_521, assuming x and y reduced | 66 | // Add modulo p_521, z := (x + y) mod p_521, assuming x and y reduced |
| 58 | // Inputs x[9], y[9]; output z[9] | 67 | // Inputs x[9], y[9]; output z[9] |
| 59 | extern void bignum_add_p521 (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); | 68 | extern void bignum_add_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9], const uint64_t y[S2N_BIGNUM_STATIC 9]); |
| 69 | |||
| 70 | // Add modulo p_sm2, z := (x + y) mod p_sm2, assuming x and y reduced | ||
| 71 | // Inputs x[4], y[4]; output z[4] | ||
| 72 | extern void bignum_add_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); | ||
| 60 | 73 | ||
| 61 | // Compute "amontification" constant z :== 2^{128k} (congruent mod m) | 74 | // Compute "amontification" constant z :== 2^{128k} (congruent mod m) |
| 62 | // Input m[k]; output z[k]; temporary buffer t[>=k] | 75 | // Input m[k]; output z[k]; temporary buffer t[>=k] |
| 63 | extern void bignum_amontifier (uint64_t k, uint64_t *z, uint64_t *m, uint64_t *t); | 76 | extern void bignum_amontifier (uint64_t k, uint64_t *z, const uint64_t *m, uint64_t *t); |
| 64 | 77 | ||
| 65 | // Almost-Montgomery multiply, z :== (x * y / 2^{64k}) (congruent mod m) | 78 | // Almost-Montgomery multiply, z :== (x * y / 2^{64k}) (congruent mod m) |
| 66 | // Inputs x[k], y[k], m[k]; output z[k] | 79 | // Inputs x[k], y[k], m[k]; output z[k] |
| 67 | extern void bignum_amontmul (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y, uint64_t *m); | 80 | extern void bignum_amontmul (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *y, const uint64_t *m); |
| 68 | 81 | ||
| 69 | // Almost-Montgomery reduce, z :== (x' / 2^{64p}) (congruent mod m) | 82 | // Almost-Montgomery reduce, z :== (x' / 2^{64p}) (congruent mod m) |
| 70 | // Inputs x[n], m[k], p; output z[k] | 83 | // Inputs x[n], m[k], p; output z[k] |
| 71 | extern void bignum_amontredc (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t *m, uint64_t p); | 84 | extern void bignum_amontredc (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x, const uint64_t *m, uint64_t p); |
| 72 | 85 | ||
| 73 | // Almost-Montgomery square, z :== (x^2 / 2^{64k}) (congruent mod m) | 86 | // Almost-Montgomery square, z :== (x^2 / 2^{64k}) (congruent mod m) |
| 74 | // Inputs x[k], m[k]; output z[k] | 87 | // Inputs x[k], m[k]; output z[k] |
| 75 | extern void bignum_amontsqr (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *m); | 88 | extern void bignum_amontsqr (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *m); |
| 76 | 89 | ||
| 77 | // Convert 4-digit (256-bit) bignum to/from big-endian form | 90 | // Convert 4-digit (256-bit) bignum to/from big-endian form |
| 78 | // Input x[4]; output z[4] | 91 | // Input x[4]; output z[4] |
| 79 | extern void bignum_bigendian_4 (uint64_t z[static 4], uint64_t x[static 4]); | 92 | extern void bignum_bigendian_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 80 | 93 | ||
| 81 | // Convert 6-digit (384-bit) bignum to/from big-endian form | 94 | // Convert 6-digit (384-bit) bignum to/from big-endian form |
| 82 | // Input x[6]; output z[6] | 95 | // Input x[6]; output z[6] |
| 83 | extern void bignum_bigendian_6 (uint64_t z[static 6], uint64_t x[static 6]); | 96 | extern void bignum_bigendian_6 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 84 | 97 | ||
| 85 | // Select bitfield starting at bit n with length l <= 64 | 98 | // Select bitfield starting at bit n with length l <= 64 |
| 86 | // Inputs x[k], n, l; output function return | 99 | // Inputs x[k], n, l; output function return |
| 87 | extern uint64_t bignum_bitfield (uint64_t k, uint64_t *x, uint64_t n, uint64_t l); | 100 | extern uint64_t bignum_bitfield (uint64_t k, const uint64_t *x, uint64_t n, uint64_t l); |
| 88 | 101 | ||
| 89 | // Return size of bignum in bits | 102 | // Return size of bignum in bits |
| 90 | // Input x[k]; output function return | 103 | // Input x[k]; output function return |
| 91 | extern uint64_t bignum_bitsize (uint64_t k, uint64_t *x); | 104 | extern uint64_t bignum_bitsize (uint64_t k, const uint64_t *x); |
| 92 | 105 | ||
| 93 | // Divide by a single (nonzero) word, z := x / m and return x mod m | 106 | // Divide by a single (nonzero) word, z := x / m and return x mod m |
| 94 | // Inputs x[n], m; outputs function return (remainder) and z[k] | 107 | // Inputs x[n], m; outputs function return (remainder) and z[k] |
| 95 | extern uint64_t bignum_cdiv (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t m); | 108 | extern uint64_t bignum_cdiv (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x, uint64_t m); |
| 96 | 109 | ||
| 97 | // Divide by a single word, z := x / m when known to be exact | 110 | // Divide by a single word, z := x / m when known to be exact |
| 98 | // Inputs x[n], m; output z[k] | 111 | // Inputs x[n], m; output z[k] |
| 99 | extern void bignum_cdiv_exact (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t m); | 112 | extern void bignum_cdiv_exact (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x, uint64_t m); |
| 100 | 113 | ||
| 101 | // Count leading zero digits (64-bit words) | 114 | // Count leading zero digits (64-bit words) |
| 102 | // Input x[k]; output function return | 115 | // Input x[k]; output function return |
| 103 | extern uint64_t bignum_cld (uint64_t k, uint64_t *x); | 116 | extern uint64_t bignum_cld (uint64_t k, const uint64_t *x); |
| 104 | 117 | ||
| 105 | // Count leading zero bits | 118 | // Count leading zero bits |
| 106 | // Input x[k]; output function return | 119 | // Input x[k]; output function return |
| 107 | extern uint64_t bignum_clz (uint64_t k, uint64_t *x); | 120 | extern uint64_t bignum_clz (uint64_t k, const uint64_t *x); |
| 108 | 121 | ||
| 109 | // Multiply-add with single-word multiplier, z := z + c * y | 122 | // Multiply-add with single-word multiplier, z := z + c * y |
| 110 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] | 123 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] |
| 111 | extern uint64_t bignum_cmadd (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); | 124 | extern uint64_t bignum_cmadd (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, const uint64_t *y); |
| 112 | 125 | ||
| 113 | // Negated multiply-add with single-word multiplier, z := z - c * y | 126 | // Negated multiply-add with single-word multiplier, z := z - c * y |
| 114 | // Inputs c, y[n]; outputs function return (negative carry-out) and z[k] | 127 | // Inputs c, y[n]; outputs function return (negative carry-out) and z[k] |
| 115 | extern uint64_t bignum_cmnegadd (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); | 128 | extern uint64_t bignum_cmnegadd (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, const uint64_t *y); |
| 116 | 129 | ||
| 117 | // Find modulus of bignum w.r.t. single nonzero word m, returning x mod m | 130 | // Find modulus of bignum w.r.t. single nonzero word m, returning x mod m |
| 118 | // Input x[k], m; output function return | 131 | // Input x[k], m; output function return |
| 119 | extern uint64_t bignum_cmod (uint64_t k, uint64_t *x, uint64_t m); | 132 | extern uint64_t bignum_cmod (uint64_t k, const uint64_t *x, uint64_t m); |
| 120 | 133 | ||
| 121 | // Multiply by a single word, z := c * y | 134 | // Multiply by a single word, z := c * y |
| 122 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] | 135 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] |
| 123 | extern uint64_t bignum_cmul (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); | 136 | extern uint64_t bignum_cmul (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, const uint64_t *y); |
| 124 | 137 | ||
| 125 | // Multiply by a single word modulo p_25519, z := (c * x) mod p_25519, assuming x reduced | 138 | // Multiply by a single word modulo p_25519, z := (c * x) mod p_25519, assuming x reduced |
| 126 | // Inputs c, x[4]; output z[4] | 139 | // Inputs c, x[4]; output z[4] |
| 127 | extern void bignum_cmul_p25519 (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]); | 140 | extern void bignum_cmul_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 128 | extern void bignum_cmul_p25519_alt (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]); | 141 | extern void bignum_cmul_p25519_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 129 | 142 | ||
| 130 | // Multiply by a single word modulo p_256, z := (c * x) mod p_256, assuming x reduced | 143 | // Multiply by a single word modulo p_256, z := (c * x) mod p_256, assuming x reduced |
| 131 | // Inputs c, x[4]; output z[4] | 144 | // Inputs c, x[4]; output z[4] |
| 132 | extern void bignum_cmul_p256 (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]); | 145 | extern void bignum_cmul_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 133 | extern void bignum_cmul_p256_alt (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]); | 146 | extern void bignum_cmul_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 134 | 147 | ||
| 135 | // Multiply by a single word modulo p_256k1, z := (c * x) mod p_256k1, assuming x reduced | 148 | // Multiply by a single word modulo p_256k1, z := (c * x) mod p_256k1, assuming x reduced |
| 136 | // Inputs c, x[4]; output z[4] | 149 | // Inputs c, x[4]; output z[4] |
| 137 | extern void bignum_cmul_p256k1 (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]); | 150 | extern void bignum_cmul_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 138 | extern void bignum_cmul_p256k1_alt (uint64_t z[static 4], uint64_t c, uint64_t x[static 4]); | 151 | extern void bignum_cmul_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 139 | 152 | ||
| 140 | // Multiply by a single word modulo p_384, z := (c * x) mod p_384, assuming x reduced | 153 | // Multiply by a single word modulo p_384, z := (c * x) mod p_384, assuming x reduced |
| 141 | // Inputs c, x[6]; output z[6] | 154 | // Inputs c, x[6]; output z[6] |
| 142 | extern void bignum_cmul_p384 (uint64_t z[static 6], uint64_t c, uint64_t x[static 6]); | 155 | extern void bignum_cmul_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 143 | extern void bignum_cmul_p384_alt (uint64_t z[static 6], uint64_t c, uint64_t x[static 6]); | 156 | extern void bignum_cmul_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 144 | 157 | ||
| 145 | // Multiply by a single word modulo p_521, z := (c * x) mod p_521, assuming x reduced | 158 | // Multiply by a single word modulo p_521, z := (c * x) mod p_521, assuming x reduced |
| 146 | // Inputs c, x[9]; output z[9] | 159 | // Inputs c, x[9]; output z[9] |
| 147 | extern void bignum_cmul_p521 (uint64_t z[static 9], uint64_t c, uint64_t x[static 9]); | 160 | extern void bignum_cmul_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 9]); |
| 148 | extern void bignum_cmul_p521_alt (uint64_t z[static 9], uint64_t c, uint64_t x[static 9]); | 161 | extern void bignum_cmul_p521_alt (uint64_t z[S2N_BIGNUM_STATIC 9], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 9]); |
| 162 | |||
| 163 | // Multiply by a single word modulo p_sm2, z := (c * x) mod p_sm2, assuming x reduced | ||
| 164 | // Inputs c, x[4]; output z[4] | ||
| 165 | extern void bignum_cmul_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 166 | extern void bignum_cmul_sm2_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t c, const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 149 | 167 | ||
| 150 | // Test bignums for coprimality, gcd(x,y) = 1 | 168 | // Test bignums for coprimality, gcd(x,y) = 1 |
| 151 | // Inputs x[m], y[n]; output function return; temporary buffer t[>=2*max(m,n)] | 169 | // Inputs x[m], y[n]; output function return; temporary buffer t[>=2*max(m,n)] |
| 152 | extern uint64_t bignum_coprime (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y, uint64_t *t); | 170 | extern uint64_t bignum_coprime (uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y, uint64_t *t); |
| 153 | 171 | ||
| 154 | // Copy bignum with zero-extension or truncation, z := x | 172 | // Copy bignum with zero-extension or truncation, z := x |
| 155 | // Input x[n]; output z[k] | 173 | // Input x[n]; output z[k] |
| 156 | extern void bignum_copy (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x); | 174 | extern void bignum_copy (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x); |
| 175 | |||
| 176 | // Given table: uint64_t[height*width], copy table[idx*width...(idx+1)*width-1] | ||
| 177 | // into z[0..width-1]. | ||
| 178 | // This function is constant-time with respect to the value of `idx`. This is | ||
| 179 | // achieved by reading the whole table and using the bit-masking to get the | ||
| 180 | // `idx`-th row. | ||
| 181 | // Input table[height*width]; output z[width] | ||
| 182 | extern void bignum_copy_row_from_table (uint64_t *z, const uint64_t *table, uint64_t height, | ||
| 183 | uint64_t width, uint64_t idx); | ||
| 184 | |||
| 185 | // Given table: uint64_t[height*width], copy table[idx*width...(idx+1)*width-1] | ||
| 186 | // into z[0..width-1]. width must be a multiple of 8. | ||
| 187 | // This function is constant-time with respect to the value of `idx`. This is | ||
| 188 | // achieved by reading the whole table and using the bit-masking to get the | ||
| 189 | // `idx`-th row. | ||
| 190 | // Input table[height*width]; output z[width] | ||
| 191 | extern void bignum_copy_row_from_table_8n (uint64_t *z, const uint64_t *table, | ||
| 192 | uint64_t height, uint64_t width, uint64_t idx); | ||
| 193 | |||
| 194 | // Given table: uint64_t[height*16], copy table[idx*16...(idx+1)*16-1] into z[0..row-1]. | ||
| 195 | // This function is constant-time with respect to the value of `idx`. This is | ||
| 196 | // achieved by reading the whole table and using the bit-masking to get the | ||
| 197 | // `idx`-th row. | ||
| 198 | // Input table[height*16]; output z[16] | ||
| 199 | extern void bignum_copy_row_from_table_16 (uint64_t *z, const uint64_t *table, | ||
| 200 | uint64_t height, uint64_t idx); | ||
| 201 | |||
| 202 | // Given table: uint64_t[height*32], copy table[idx*32...(idx+1)*32-1] into z[0..row-1]. | ||
| 203 | // This function is constant-time with respect to the value of `idx`. This is | ||
| 204 | // achieved by reading the whole table and using the bit-masking to get the | ||
| 205 | // `idx`-th row. | ||
| 206 | // Input table[height*32]; output z[32] | ||
| 207 | extern void bignum_copy_row_from_table_32 (uint64_t *z, const uint64_t *table, | ||
| 208 | uint64_t height, uint64_t idx); | ||
| 157 | 209 | ||
| 158 | // Count trailing zero digits (64-bit words) | 210 | // Count trailing zero digits (64-bit words) |
| 159 | // Input x[k]; output function return | 211 | // Input x[k]; output function return |
| 160 | extern uint64_t bignum_ctd (uint64_t k, uint64_t *x); | 212 | extern uint64_t bignum_ctd (uint64_t k, const uint64_t *x); |
| 161 | 213 | ||
| 162 | // Count trailing zero bits | 214 | // Count trailing zero bits |
| 163 | // Input x[k]; output function return | 215 | // Input x[k]; output function return |
| 164 | extern uint64_t bignum_ctz (uint64_t k, uint64_t *x); | 216 | extern uint64_t bignum_ctz (uint64_t k, const uint64_t *x); |
| 165 | 217 | ||
| 166 | // Convert from almost-Montgomery form, z := (x / 2^256) mod p_256 | 218 | // Convert from almost-Montgomery form, z := (x / 2^256) mod p_256 |
| 167 | // Input x[4]; output z[4] | 219 | // Input x[4]; output z[4] |
| 168 | extern void bignum_deamont_p256 (uint64_t z[static 4], uint64_t x[static 4]); | 220 | extern void bignum_deamont_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 169 | extern void bignum_deamont_p256_alt (uint64_t z[static 4], uint64_t x[static 4]); | 221 | extern void bignum_deamont_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 170 | 222 | ||
| 171 | // Convert from almost-Montgomery form, z := (x / 2^256) mod p_256k1 | 223 | // Convert from almost-Montgomery form, z := (x / 2^256) mod p_256k1 |
| 172 | // Input x[4]; output z[4] | 224 | // Input x[4]; output z[4] |
| 173 | extern void bignum_deamont_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); | 225 | extern void bignum_deamont_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 174 | 226 | ||
| 175 | // Convert from almost-Montgomery form, z := (x / 2^384) mod p_384 | 227 | // Convert from almost-Montgomery form, z := (x / 2^384) mod p_384 |
| 176 | // Input x[6]; output z[6] | 228 | // Input x[6]; output z[6] |
| 177 | extern void bignum_deamont_p384 (uint64_t z[static 6], uint64_t x[static 6]); | 229 | extern void bignum_deamont_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 178 | extern void bignum_deamont_p384_alt (uint64_t z[static 6], uint64_t x[static 6]); | 230 | extern void bignum_deamont_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 179 | 231 | ||
| 180 | // Convert from almost-Montgomery form z := (x / 2^576) mod p_521 | 232 | // Convert from almost-Montgomery form z := (x / 2^576) mod p_521 |
| 181 | // Input x[9]; output z[9] | 233 | // Input x[9]; output z[9] |
| 182 | extern void bignum_deamont_p521 (uint64_t z[static 9], uint64_t x[static 9]); | 234 | extern void bignum_deamont_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
| 235 | |||
| 236 | // Convert from almost-Montgomery form z := (x / 2^256) mod p_sm2 | ||
| 237 | // Input x[4]; output z[4] | ||
| 238 | extern void bignum_deamont_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 183 | 239 | ||
| 184 | // Convert from (almost-)Montgomery form z := (x / 2^{64k}) mod m | 240 | // Convert from (almost-)Montgomery form z := (x / 2^{64k}) mod m |
| 185 | // Inputs x[k], m[k]; output z[k] | 241 | // Inputs x[k], m[k]; output z[k] |
| 186 | extern void bignum_demont (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *m); | 242 | extern void bignum_demont (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *m); |
| 187 | 243 | ||
| 188 | // Convert from Montgomery form z := (x / 2^256) mod p_256, assuming x reduced | 244 | // Convert from Montgomery form z := (x / 2^256) mod p_256, assuming x reduced |
| 189 | // Input x[4]; output z[4] | 245 | // Input x[4]; output z[4] |
| 190 | extern void bignum_demont_p256 (uint64_t z[static 4], uint64_t x[static 4]); | 246 | extern void bignum_demont_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 191 | extern void bignum_demont_p256_alt (uint64_t z[static 4], uint64_t x[static 4]); | 247 | extern void bignum_demont_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 192 | 248 | ||
| 193 | // Convert from Montgomery form z := (x / 2^256) mod p_256k1, assuming x reduced | 249 | // Convert from Montgomery form z := (x / 2^256) mod p_256k1, assuming x reduced |
| 194 | // Input x[4]; output z[4] | 250 | // Input x[4]; output z[4] |
| 195 | extern void bignum_demont_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); | 251 | extern void bignum_demont_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 196 | 252 | ||
| 197 | // Convert from Montgomery form z := (x / 2^384) mod p_384, assuming x reduced | 253 | // Convert from Montgomery form z := (x / 2^384) mod p_384, assuming x reduced |
| 198 | // Input x[6]; output z[6] | 254 | // Input x[6]; output z[6] |
| 199 | extern void bignum_demont_p384 (uint64_t z[static 6], uint64_t x[static 6]); | 255 | extern void bignum_demont_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 200 | extern void bignum_demont_p384_alt (uint64_t z[static 6], uint64_t x[static 6]); | 256 | extern void bignum_demont_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 201 | 257 | ||
| 202 | // Convert from Montgomery form z := (x / 2^576) mod p_521, assuming x reduced | 258 | // Convert from Montgomery form z := (x / 2^576) mod p_521, assuming x reduced |
| 203 | // Input x[9]; output z[9] | 259 | // Input x[9]; output z[9] |
| 204 | extern void bignum_demont_p521 (uint64_t z[static 9], uint64_t x[static 9]); | 260 | extern void bignum_demont_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
| 261 | |||
| 262 | // Convert from Montgomery form z := (x / 2^256) mod p_sm2, assuming x reduced | ||
| 263 | // Input x[4]; output z[4] | ||
| 264 | extern void bignum_demont_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 205 | 265 | ||
| 206 | // Select digit x[n] | 266 | // Select digit x[n] |
| 207 | // Inputs x[k], n; output function return | 267 | // Inputs x[k], n; output function return |
| 208 | extern uint64_t bignum_digit (uint64_t k, uint64_t *x, uint64_t n); | 268 | extern uint64_t bignum_digit (uint64_t k, const uint64_t *x, uint64_t n); |
| 209 | 269 | ||
| 210 | // Return size of bignum in digits (64-bit word) | 270 | // Return size of bignum in digits (64-bit word) |
| 211 | // Input x[k]; output function return | 271 | // Input x[k]; output function return |
| 212 | extern uint64_t bignum_digitsize (uint64_t k, uint64_t *x); | 272 | extern uint64_t bignum_digitsize (uint64_t k, const uint64_t *x); |
| 213 | 273 | ||
| 214 | // Divide bignum by 10: z' := z div 10, returning remainder z mod 10 | 274 | // Divide bignum by 10: z' := z div 10, returning remainder z mod 10 |
| 215 | // Inputs z[k]; outputs function return (remainder) and z[k] | 275 | // Inputs z[k]; outputs function return (remainder) and z[k] |
| @@ -217,294 +277,391 @@ extern uint64_t bignum_divmod10 (uint64_t k, uint64_t *z); | |||
| 217 | 277 | ||
| 218 | // Double modulo p_25519, z := (2 * x) mod p_25519, assuming x reduced | 278 | // Double modulo p_25519, z := (2 * x) mod p_25519, assuming x reduced |
| 219 | // Input x[4]; output z[4] | 279 | // Input x[4]; output z[4] |
| 220 | extern void bignum_double_p25519 (uint64_t z[static 4], uint64_t x[static 4]); | 280 | extern void bignum_double_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 221 | 281 | ||
| 222 | // Double modulo p_256, z := (2 * x) mod p_256, assuming x reduced | 282 | // Double modulo p_256, z := (2 * x) mod p_256, assuming x reduced |
| 223 | // Input x[4]; output z[4] | 283 | // Input x[4]; output z[4] |
| 224 | extern void bignum_double_p256 (uint64_t z[static 4], uint64_t x[static 4]); | 284 | extern void bignum_double_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 225 | 285 | ||
| 226 | // Double modulo p_256k1, z := (2 * x) mod p_256k1, assuming x reduced | 286 | // Double modulo p_256k1, z := (2 * x) mod p_256k1, assuming x reduced |
| 227 | // Input x[4]; output z[4] | 287 | // Input x[4]; output z[4] |
| 228 | extern void bignum_double_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); | 288 | extern void bignum_double_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 229 | 289 | ||
| 230 | // Double modulo p_384, z := (2 * x) mod p_384, assuming x reduced | 290 | // Double modulo p_384, z := (2 * x) mod p_384, assuming x reduced |
| 231 | // Input x[6]; output z[6] | 291 | // Input x[6]; output z[6] |
| 232 | extern void bignum_double_p384 (uint64_t z[static 6], uint64_t x[static 6]); | 292 | extern void bignum_double_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 233 | 293 | ||
| 234 | // Double modulo p_521, z := (2 * x) mod p_521, assuming x reduced | 294 | // Double modulo p_521, z := (2 * x) mod p_521, assuming x reduced |
| 235 | // Input x[9]; output z[9] | 295 | // Input x[9]; output z[9] |
| 236 | extern void bignum_double_p521 (uint64_t z[static 9], uint64_t x[static 9]); | 296 | extern void bignum_double_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
| 297 | |||
| 298 | // Double modulo p_sm2, z := (2 * x) mod p_sm2, assuming x reduced | ||
| 299 | // Input x[4]; output z[4] | ||
| 300 | extern void bignum_double_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 237 | 301 | ||
| 238 | // Extended Montgomery reduce, returning results in input-output buffer | 302 | // Extended Montgomery reduce, returning results in input-output buffer |
| 239 | // Inputs z[2*k], m[k], w; outputs function return (extra result bit) and z[2*k] | 303 | // Inputs z[2*k], m[k], w; outputs function return (extra result bit) and z[2*k] |
| 240 | extern uint64_t bignum_emontredc (uint64_t k, uint64_t *z, uint64_t *m, uint64_t w); | 304 | extern uint64_t bignum_emontredc (uint64_t k, uint64_t *z, const uint64_t *m, uint64_t w); |
| 241 | 305 | ||
| 242 | // Extended Montgomery reduce in 8-digit blocks, results in input-output buffer | 306 | // Extended Montgomery reduce in 8-digit blocks, results in input-output buffer |
| 243 | // Inputs z[2*k], m[k], w; outputs function return (extra result bit) and z[2*k] | 307 | // Inputs z[2*k], m[k], w; outputs function return (extra result bit) and z[2*k] |
| 244 | extern uint64_t bignum_emontredc_8n (uint64_t k, uint64_t *z, uint64_t *m, uint64_t w); | 308 | extern uint64_t bignum_emontredc_8n (uint64_t k, uint64_t *z, const uint64_t *m, uint64_t w); |
| 309 | // Inputs z[2*k], m[k], w; outputs function return (extra result bit) and z[2*k] | ||
| 310 | // Temporary buffer m_precalc[12*(k/4-1)] | ||
| 311 | extern uint64_t bignum_emontredc_8n_cdiff (uint64_t k, uint64_t *z, const uint64_t *m, | ||
| 312 | uint64_t w, uint64_t *m_precalc); | ||
| 245 | 313 | ||
| 246 | // Test bignums for equality, x = y | 314 | // Test bignums for equality, x = y |
| 247 | // Inputs x[m], y[n]; output function return | 315 | // Inputs x[m], y[n]; output function return |
| 248 | extern uint64_t bignum_eq (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | 316 | extern uint64_t bignum_eq (uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y); |
| 249 | 317 | ||
| 250 | // Test bignum for even-ness | 318 | // Test bignum for even-ness |
| 251 | // Input x[k]; output function return | 319 | // Input x[k]; output function return |
| 252 | extern uint64_t bignum_even (uint64_t k, uint64_t *x); | 320 | extern uint64_t bignum_even (uint64_t k, const uint64_t *x); |
| 253 | 321 | ||
| 254 | // Convert 4-digit (256-bit) bignum from big-endian bytes | 322 | // Convert 4-digit (256-bit) bignum from big-endian bytes |
| 255 | // Input x[32] (bytes); output z[4] | 323 | // Input x[32] (bytes); output z[4] |
| 256 | extern void bignum_frombebytes_4 (uint64_t z[static 4], uint8_t x[static 32]); | 324 | extern void bignum_frombebytes_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint8_t x[S2N_BIGNUM_STATIC 32]); |
| 257 | 325 | ||
| 258 | // Convert 6-digit (384-bit) bignum from big-endian bytes | 326 | // Convert 6-digit (384-bit) bignum from big-endian bytes |
| 259 | // Input x[48] (bytes); output z[6] | 327 | // Input x[48] (bytes); output z[6] |
| 260 | extern void bignum_frombebytes_6 (uint64_t z[static 6], uint8_t x[static 48]); | 328 | extern void bignum_frombebytes_6 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint8_t x[S2N_BIGNUM_STATIC 48]); |
| 261 | 329 | ||
| 262 | // Convert 4-digit (256-bit) bignum from little-endian bytes | 330 | // Convert 4-digit (256-bit) bignum from little-endian bytes |
| 263 | // Input x[32] (bytes); output z[4] | 331 | // Input x[32] (bytes); output z[4] |
| 264 | extern void bignum_fromlebytes_4 (uint64_t z[static 4], uint8_t x[static 32]); | 332 | extern void bignum_fromlebytes_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint8_t x[S2N_BIGNUM_STATIC 32]); |
| 265 | 333 | ||
| 266 | // Convert 6-digit (384-bit) bignum from little-endian bytes | 334 | // Convert 6-digit (384-bit) bignum from little-endian bytes |
| 267 | // Input x[48] (bytes); output z[6] | 335 | // Input x[48] (bytes); output z[6] |
| 268 | extern void bignum_fromlebytes_6 (uint64_t z[static 6], uint8_t x[static 48]); | 336 | extern void bignum_fromlebytes_6 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint8_t x[S2N_BIGNUM_STATIC 48]); |
| 269 | 337 | ||
| 270 | // Convert little-endian bytes to 9-digit 528-bit bignum | 338 | // Convert little-endian bytes to 9-digit 528-bit bignum |
| 271 | // Input x[66] (bytes); output z[9] | 339 | // Input x[66] (bytes); output z[9] |
| 272 | extern void bignum_fromlebytes_p521 (uint64_t z[static 9],uint8_t x[static 66]); | 340 | extern void bignum_fromlebytes_p521 (uint64_t z[S2N_BIGNUM_STATIC 9],const uint8_t x[S2N_BIGNUM_STATIC 66]); |
| 273 | 341 | ||
| 274 | // Compare bignums, x >= y | 342 | // Compare bignums, x >= y |
| 275 | // Inputs x[m], y[n]; output function return | 343 | // Inputs x[m], y[n]; output function return |
| 276 | extern uint64_t bignum_ge (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | 344 | extern uint64_t bignum_ge (uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y); |
| 277 | 345 | ||
| 278 | // Compare bignums, x > y | 346 | // Compare bignums, x > y |
| 279 | // Inputs x[m], y[n]; output function return | 347 | // Inputs x[m], y[n]; output function return |
| 280 | extern uint64_t bignum_gt (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | 348 | extern uint64_t bignum_gt (uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y); |
| 281 | 349 | ||
| 282 | // Halve modulo p_256, z := (x / 2) mod p_256, assuming x reduced | 350 | // Halve modulo p_256, z := (x / 2) mod p_256, assuming x reduced |
| 283 | // Input x[4]; output z[4] | 351 | // Input x[4]; output z[4] |
| 284 | extern void bignum_half_p256 (uint64_t z[static 4], uint64_t x[static 4]); | 352 | extern void bignum_half_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 285 | 353 | ||
| 286 | // Halve modulo p_256k1, z := (x / 2) mod p_256k1, assuming x reduced | 354 | // Halve modulo p_256k1, z := (x / 2) mod p_256k1, assuming x reduced |
| 287 | // Input x[4]; output z[4] | 355 | // Input x[4]; output z[4] |
| 288 | extern void bignum_half_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); | 356 | extern void bignum_half_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 289 | 357 | ||
| 290 | // Halve modulo p_384, z := (x / 2) mod p_384, assuming x reduced | 358 | // Halve modulo p_384, z := (x / 2) mod p_384, assuming x reduced |
| 291 | // Input x[6]; output z[6] | 359 | // Input x[6]; output z[6] |
| 292 | extern void bignum_half_p384 (uint64_t z[static 6], uint64_t x[static 6]); | 360 | extern void bignum_half_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 293 | 361 | ||
| 294 | // Halve modulo p_521, z := (x / 2) mod p_521, assuming x reduced | 362 | // Halve modulo p_521, z := (x / 2) mod p_521, assuming x reduced |
| 295 | // Input x[9]; output z[9] | 363 | // Input x[9]; output z[9] |
| 296 | extern void bignum_half_p521 (uint64_t z[static 9], uint64_t x[static 9]); | 364 | extern void bignum_half_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
| 365 | |||
| 366 | // Halve modulo p_sm2, z := (x / 2) mod p_sm2, assuming x reduced | ||
| 367 | // Input x[4]; output z[4] | ||
| 368 | extern void bignum_half_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 369 | |||
| 370 | // Modular inverse modulo p_25519 = 2^255 - 19 | ||
| 371 | // Input x[4]; output z[4] | ||
| 372 | extern void bignum_inv_p25519(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 373 | |||
| 374 | // Modular inverse modulo p_256 = 2^256 - 2^224 + 2^192 + 2^96 - 1 | ||
| 375 | // Input x[4]; output z[4] | ||
| 376 | extern void bignum_inv_p256(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 377 | |||
| 378 | // Modular inverse modulo p_384 = 2^384 - 2^128 - 2^96 + 2^32 - 1 | ||
| 379 | // Input x[6]; output z[6] | ||
| 380 | extern void bignum_inv_p384(uint64_t z[S2N_BIGNUM_STATIC 6],const uint64_t x[S2N_BIGNUM_STATIC 6]); | ||
| 381 | |||
| 382 | // Modular inverse modulo p_521 = 2^521 - 1 | ||
| 383 | // Input x[9]; output z[9] | ||
| 384 | extern void bignum_inv_p521(uint64_t z[S2N_BIGNUM_STATIC 9],const uint64_t x[S2N_BIGNUM_STATIC 9]); | ||
| 385 | |||
| 386 | // Modular inverse modulo p_sm2 = 2^256 - 2^224 - 2^96 + 2^64 - 1 | ||
| 387 | // Input x[4]; output z[4] | ||
| 388 | extern void bignum_inv_sm2(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 389 | |||
| 390 | // Inverse square root modulo p_25519 | ||
| 391 | // Input x[4]; output function return (Legendre symbol) and z[4] | ||
| 392 | extern int64_t bignum_invsqrt_p25519(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 393 | extern int64_t bignum_invsqrt_p25519_alt(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 297 | 394 | ||
| 298 | // Test bignum for zero-ness, x = 0 | 395 | // Test bignum for zero-ness, x = 0 |
| 299 | // Input x[k]; output function return | 396 | // Input x[k]; output function return |
| 300 | extern uint64_t bignum_iszero (uint64_t k, uint64_t *x); | 397 | extern uint64_t bignum_iszero (uint64_t k, const uint64_t *x); |
| 301 | 398 | ||
| 302 | // Multiply z := x * y | 399 | // Multiply z := x * y |
| 303 | // Inputs x[16], y[16]; output z[32]; temporary buffer t[>=32] | 400 | // Inputs x[16], y[16]; output z[32]; temporary buffer t[>=32] |
| 304 | extern void bignum_kmul_16_32 (uint64_t z[static 32], uint64_t x[static 16], uint64_t y[static 16], uint64_t t[static 32]); | 401 | extern void bignum_kmul_16_32 (uint64_t z[S2N_BIGNUM_STATIC 32], const uint64_t x[S2N_BIGNUM_STATIC 16], const uint64_t y[S2N_BIGNUM_STATIC 16], uint64_t t[S2N_BIGNUM_STATIC 32]); |
| 305 | 402 | ||
| 306 | // Multiply z := x * y | 403 | // Multiply z := x * y |
| 307 | // Inputs x[32], y[32]; output z[64]; temporary buffer t[>=96] | 404 | // Inputs x[32], y[32]; output z[64]; temporary buffer t[>=96] |
| 308 | extern void bignum_kmul_32_64 (uint64_t z[static 64], uint64_t x[static 32], uint64_t y[static 32], uint64_t t[static 96]); | 405 | extern void bignum_kmul_32_64 (uint64_t z[S2N_BIGNUM_STATIC 64], const uint64_t x[S2N_BIGNUM_STATIC 32], const uint64_t y[S2N_BIGNUM_STATIC 32], uint64_t t[S2N_BIGNUM_STATIC 96]); |
| 309 | 406 | ||
| 310 | // Square, z := x^2 | 407 | // Square, z := x^2 |
| 311 | // Input x[16]; output z[32]; temporary buffer t[>=24] | 408 | // Input x[16]; output z[32]; temporary buffer t[>=24] |
| 312 | extern void bignum_ksqr_16_32 (uint64_t z[static 32], uint64_t x[static 16], uint64_t t[static 24]); | 409 | extern void bignum_ksqr_16_32 (uint64_t z[S2N_BIGNUM_STATIC 32], const uint64_t x[S2N_BIGNUM_STATIC 16], uint64_t t[S2N_BIGNUM_STATIC 24]); |
| 313 | 410 | ||
| 314 | // Square, z := x^2 | 411 | // Square, z := x^2 |
| 315 | // Input x[32]; output z[64]; temporary buffer t[>=72] | 412 | // Input x[32]; output z[64]; temporary buffer t[>=72] |
| 316 | extern void bignum_ksqr_32_64 (uint64_t z[static 64], uint64_t x[static 32], uint64_t t[static 72]); | 413 | extern void bignum_ksqr_32_64 (uint64_t z[S2N_BIGNUM_STATIC 64], const uint64_t x[S2N_BIGNUM_STATIC 32], uint64_t t[S2N_BIGNUM_STATIC 72]); |
| 317 | 414 | ||
| 318 | // Compare bignums, x <= y | 415 | // Compare bignums, x <= y |
| 319 | // Inputs x[m], y[n]; output function return | 416 | // Inputs x[m], y[n]; output function return |
| 320 | extern uint64_t bignum_le (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | 417 | extern uint64_t bignum_le (uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y); |
| 321 | 418 | ||
| 322 | // Convert 4-digit (256-bit) bignum to/from little-endian form | 419 | // Convert 4-digit (256-bit) bignum to/from little-endian form |
| 323 | // Input x[4]; output z[4] | 420 | // Input x[4]; output z[4] |
| 324 | extern void bignum_littleendian_4 (uint64_t z[static 4], uint64_t x[static 4]); | 421 | extern void bignum_littleendian_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 325 | 422 | ||
| 326 | // Convert 6-digit (384-bit) bignum to/from little-endian form | 423 | // Convert 6-digit (384-bit) bignum to/from little-endian form |
| 327 | // Input x[6]; output z[6] | 424 | // Input x[6]; output z[6] |
| 328 | extern void bignum_littleendian_6 (uint64_t z[static 6], uint64_t x[static 6]); | 425 | extern void bignum_littleendian_6 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 329 | 426 | ||
| 330 | // Compare bignums, x < y | 427 | // Compare bignums, x < y |
| 331 | // Inputs x[m], y[n]; output function return | 428 | // Inputs x[m], y[n]; output function return |
| 332 | extern uint64_t bignum_lt (uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | 429 | extern uint64_t bignum_lt (uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y); |
| 333 | 430 | ||
| 334 | // Multiply-add, z := z + x * y | 431 | // Multiply-add, z := z + x * y |
| 335 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[k] | 432 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[k] |
| 336 | extern uint64_t bignum_madd (uint64_t k, uint64_t *z, uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | 433 | extern uint64_t bignum_madd (uint64_t k, uint64_t *z, uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y); |
| 434 | |||
| 435 | // Multiply-add modulo the order of the curve25519/edwards25519 basepoint | ||
| 436 | // Inputs x[4], y[4], c[4]; output z[4] | ||
| 437 | extern void bignum_madd_n25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4], const uint64_t c[S2N_BIGNUM_STATIC 4]); | ||
| 438 | extern void bignum_madd_n25519_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4], const uint64_t c[S2N_BIGNUM_STATIC 4]); | ||
| 439 | |||
| 440 | // Reduce modulo group order, z := x mod m_25519 | ||
| 441 | // Input x[4]; output z[4] | ||
| 442 | extern void bignum_mod_m25519_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 443 | |||
| 444 | // Reduce modulo basepoint order, z := x mod n_25519 | ||
| 445 | // Input x[k]; output z[4] | ||
| 446 | extern void bignum_mod_n25519 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x); | ||
| 447 | |||
| 448 | // Reduce modulo basepoint order, z := x mod n_25519 | ||
| 449 | // Input x[4]; output z[4] | ||
| 450 | extern void bignum_mod_n25519_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 337 | 451 | ||
| 338 | // Reduce modulo group order, z := x mod n_256 | 452 | // Reduce modulo group order, z := x mod n_256 |
| 339 | // Input x[k]; output z[4] | 453 | // Input x[k]; output z[4] |
| 340 | extern void bignum_mod_n256 (uint64_t z[static 4], uint64_t k, uint64_t *x); | 454 | extern void bignum_mod_n256 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x); |
| 341 | extern void bignum_mod_n256_alt (uint64_t z[static 4], uint64_t k, uint64_t *x); | 455 | extern void bignum_mod_n256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x); |
| 342 | 456 | ||
| 343 | // Reduce modulo group order, z := x mod n_256 | 457 | // Reduce modulo group order, z := x mod n_256 |
| 344 | // Input x[4]; output z[4] | 458 | // Input x[4]; output z[4] |
| 345 | extern void bignum_mod_n256_4 (uint64_t z[static 4], uint64_t x[static 4]); | 459 | extern void bignum_mod_n256_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 346 | 460 | ||
| 347 | // Reduce modulo group order, z := x mod n_256k1 | 461 | // Reduce modulo group order, z := x mod n_256k1 |
| 348 | // Input x[4]; output z[4] | 462 | // Input x[4]; output z[4] |
| 349 | extern void bignum_mod_n256k1_4 (uint64_t z[static 4], uint64_t x[static 4]); | 463 | extern void bignum_mod_n256k1_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 350 | 464 | ||
| 351 | // Reduce modulo group order, z := x mod n_384 | 465 | // Reduce modulo group order, z := x mod n_384 |
| 352 | // Input x[k]; output z[6] | 466 | // Input x[k]; output z[6] |
| 353 | extern void bignum_mod_n384 (uint64_t z[static 6], uint64_t k, uint64_t *x); | 467 | extern void bignum_mod_n384 (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t k, const uint64_t *x); |
| 354 | extern void bignum_mod_n384_alt (uint64_t z[static 6], uint64_t k, uint64_t *x); | 468 | extern void bignum_mod_n384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t k, const uint64_t *x); |
| 355 | 469 | ||
| 356 | // Reduce modulo group order, z := x mod n_384 | 470 | // Reduce modulo group order, z := x mod n_384 |
| 357 | // Input x[6]; output z[6] | 471 | // Input x[6]; output z[6] |
| 358 | extern void bignum_mod_n384_6 (uint64_t z[static 6], uint64_t x[static 6]); | 472 | extern void bignum_mod_n384_6 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 359 | 473 | ||
| 360 | // Reduce modulo group order, z := x mod n_521 | 474 | // Reduce modulo group order, z := x mod n_521 |
| 361 | // Input x[9]; output z[9] | 475 | // Input x[9]; output z[9] |
| 362 | extern void bignum_mod_n521_9 (uint64_t z[static 9], uint64_t x[static 9]); | 476 | extern void bignum_mod_n521_9 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
| 363 | extern void bignum_mod_n521_9_alt (uint64_t z[static 9], uint64_t x[static 9]); | 477 | extern void bignum_mod_n521_9_alt (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
| 478 | |||
| 479 | // Reduce modulo group order, z := x mod n_sm2 | ||
| 480 | // Input x[k]; output z[4] | ||
| 481 | extern void bignum_mod_nsm2 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x); | ||
| 482 | extern void bignum_mod_nsm2_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x); | ||
| 483 | |||
| 484 | // Reduce modulo group order, z := x mod n_sm2 | ||
| 485 | // Input x[4]; output z[4] | ||
| 486 | extern void bignum_mod_nsm2_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 364 | 487 | ||
| 365 | // Reduce modulo field characteristic, z := x mod p_25519 | 488 | // Reduce modulo field characteristic, z := x mod p_25519 |
| 366 | // Input x[4]; output z[4] | 489 | // Input x[4]; output z[4] |
| 367 | extern void bignum_mod_p25519_4 (uint64_t z[static 4], uint64_t x[static 4]); | 490 | extern void bignum_mod_p25519_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 368 | 491 | ||
| 369 | // Reduce modulo field characteristic, z := x mod p_256 | 492 | // Reduce modulo field characteristic, z := x mod p_256 |
| 370 | // Input x[k]; output z[4] | 493 | // Input x[k]; output z[4] |
| 371 | extern void bignum_mod_p256 (uint64_t z[static 4], uint64_t k, uint64_t *x); | 494 | extern void bignum_mod_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x); |
| 372 | extern void bignum_mod_p256_alt (uint64_t z[static 4], uint64_t k, uint64_t *x); | 495 | extern void bignum_mod_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x); |
| 373 | 496 | ||
| 374 | // Reduce modulo field characteristic, z := x mod p_256 | 497 | // Reduce modulo field characteristic, z := x mod p_256 |
| 375 | // Input x[4]; output z[4] | 498 | // Input x[4]; output z[4] |
| 376 | extern void bignum_mod_p256_4 (uint64_t z[static 4], uint64_t x[static 4]); | 499 | extern void bignum_mod_p256_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 377 | 500 | ||
| 378 | // Reduce modulo field characteristic, z := x mod p_256k1 | 501 | // Reduce modulo field characteristic, z := x mod p_256k1 |
| 379 | // Input x[4]; output z[4] | 502 | // Input x[4]; output z[4] |
| 380 | extern void bignum_mod_p256k1_4 (uint64_t z[static 4], uint64_t x[static 4]); | 503 | extern void bignum_mod_p256k1_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 381 | 504 | ||
| 382 | // Reduce modulo field characteristic, z := x mod p_384 | 505 | // Reduce modulo field characteristic, z := x mod p_384 |
| 383 | // Input x[k]; output z[6] | 506 | // Input x[k]; output z[6] |
| 384 | extern void bignum_mod_p384 (uint64_t z[static 6], uint64_t k, uint64_t *x); | 507 | extern void bignum_mod_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t k, const uint64_t *x); |
| 385 | extern void bignum_mod_p384_alt (uint64_t z[static 6], uint64_t k, uint64_t *x); | 508 | extern void bignum_mod_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t k, const uint64_t *x); |
| 386 | 509 | ||
| 387 | // Reduce modulo field characteristic, z := x mod p_384 | 510 | // Reduce modulo field characteristic, z := x mod p_384 |
| 388 | // Input x[6]; output z[6] | 511 | // Input x[6]; output z[6] |
| 389 | extern void bignum_mod_p384_6 (uint64_t z[static 6], uint64_t x[static 6]); | 512 | extern void bignum_mod_p384_6 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 390 | 513 | ||
| 391 | // Reduce modulo field characteristic, z := x mod p_521 | 514 | // Reduce modulo field characteristic, z := x mod p_521 |
| 392 | // Input x[9]; output z[9] | 515 | // Input x[9]; output z[9] |
| 393 | extern void bignum_mod_p521_9 (uint64_t z[static 9], uint64_t x[static 9]); | 516 | extern void bignum_mod_p521_9 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
| 517 | |||
| 518 | // Reduce modulo field characteristic, z := x mod p_sm2 | ||
| 519 | // Input x[k]; output z[4] | ||
| 520 | extern void bignum_mod_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x); | ||
| 521 | |||
| 522 | // Reduce modulo field characteristic, z := x mod p_sm2 | ||
| 523 | // Input x[4]; output z[4] | ||
| 524 | extern void bignum_mod_sm2_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 394 | 525 | ||
| 395 | // Add modulo m, z := (x + y) mod m, assuming x and y reduced | 526 | // Add modulo m, z := (x + y) mod m, assuming x and y reduced |
| 396 | // Inputs x[k], y[k], m[k]; output z[k] | 527 | // Inputs x[k], y[k], m[k]; output z[k] |
| 397 | extern void bignum_modadd (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y, uint64_t *m); | 528 | extern void bignum_modadd (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *y, const uint64_t *m); |
| 398 | 529 | ||
| 399 | // Double modulo m, z := (2 * x) mod m, assuming x reduced | 530 | // Double modulo m, z := (2 * x) mod m, assuming x reduced |
| 400 | // Inputs x[k], m[k]; output z[k] | 531 | // Inputs x[k], m[k]; output z[k] |
| 401 | extern void bignum_moddouble (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *m); | 532 | extern void bignum_moddouble (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *m); |
| 533 | |||
| 534 | // Modular exponentiation for arbitrary odd modulus, z := (a^p) mod m | ||
| 535 | // Inputs a[k], p[k], m[k]; output z[k], temporary buffer t[>=3*k] | ||
| 536 | extern void bignum_modexp(uint64_t k,uint64_t *z, const uint64_t *a,const uint64_t *p,const uint64_t *m,uint64_t *t); | ||
| 402 | 537 | ||
| 403 | // Compute "modification" constant z := 2^{64k} mod m | 538 | // Compute "modification" constant z := 2^{64k} mod m |
| 404 | // Input m[k]; output z[k]; temporary buffer t[>=k] | 539 | // Input m[k]; output z[k]; temporary buffer t[>=k] |
| 405 | extern void bignum_modifier (uint64_t k, uint64_t *z, uint64_t *m, uint64_t *t); | 540 | extern void bignum_modifier (uint64_t k, uint64_t *z, const uint64_t *m, uint64_t *t); |
| 406 | 541 | ||
| 407 | // Invert modulo m, z = (1/a) mod b, assuming b is an odd number > 1, a coprime to b | 542 | // Invert modulo m, z = (1/a) mod b, assuming b is an odd number > 1, a coprime to b |
| 408 | // Inputs a[k], b[k]; output z[k]; temporary buffer t[>=3*k] | 543 | // Inputs a[k], b[k]; output z[k]; temporary buffer t[>=3*k] |
| 409 | extern void bignum_modinv (uint64_t k, uint64_t *z, uint64_t *a, uint64_t *b, uint64_t *t); | 544 | extern void bignum_modinv (uint64_t k, uint64_t *z, const uint64_t *a, const uint64_t *b, uint64_t *t); |
| 410 | 545 | ||
| 411 | // Optionally negate modulo m, z := (-x) mod m (if p nonzero) or z := x (if p zero), assuming x reduced | 546 | // Optionally negate modulo m, z := (-x) mod m (if p nonzero) or z := x (if p zero), assuming x reduced |
| 412 | // Inputs p, x[k], m[k]; output z[k] | 547 | // Inputs p, x[k], m[k]; output z[k] |
| 413 | extern void bignum_modoptneg (uint64_t k, uint64_t *z, uint64_t p, uint64_t *x, uint64_t *m); | 548 | extern void bignum_modoptneg (uint64_t k, uint64_t *z, uint64_t p, const uint64_t *x, const uint64_t *m); |
| 414 | 549 | ||
| 415 | // Subtract modulo m, z := (x - y) mod m, assuming x and y reduced | 550 | // Subtract modulo m, z := (x - y) mod m, assuming x and y reduced |
| 416 | // Inputs x[k], y[k], m[k]; output z[k] | 551 | // Inputs x[k], y[k], m[k]; output z[k] |
| 417 | extern void bignum_modsub (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y, uint64_t *m); | 552 | extern void bignum_modsub (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *y, const uint64_t *m); |
| 418 | 553 | ||
| 419 | // Compute "montification" constant z := 2^{128k} mod m | 554 | // Compute "montification" constant z := 2^{128k} mod m |
| 420 | // Input m[k]; output z[k]; temporary buffer t[>=k] | 555 | // Input m[k]; output z[k]; temporary buffer t[>=k] |
| 421 | extern void bignum_montifier (uint64_t k, uint64_t *z, uint64_t *m, uint64_t *t); | 556 | extern void bignum_montifier (uint64_t k, uint64_t *z, const uint64_t *m, uint64_t *t); |
| 557 | |||
| 558 | // Montgomery inverse modulo p_256 = 2^256 - 2^224 + 2^192 + 2^96 - 1 | ||
| 559 | // Input x[4]; output z[4] | ||
| 560 | extern void bignum_montinv_p256(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 561 | |||
| 562 | // Montgomery inverse modulo p_384 = 2^384 - 2^128 - 2^96 + 2^32 - 1 | ||
| 563 | // Input x[6]; output z[6] | ||
| 564 | extern void bignum_montinv_p384(uint64_t z[S2N_BIGNUM_STATIC 6],const uint64_t x[S2N_BIGNUM_STATIC 6]); | ||
| 565 | |||
| 566 | // Montgomery inverse modulo p_sm2 = 2^256 - 2^224 - 2^96 + 2^64 - 1 | ||
| 567 | // Input x[4]; output z[4] | ||
| 568 | extern void bignum_montinv_sm2(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 422 | 569 | ||
| 423 | // Montgomery multiply, z := (x * y / 2^{64k}) mod m | 570 | // Montgomery multiply, z := (x * y / 2^{64k}) mod m |
| 424 | // Inputs x[k], y[k], m[k]; output z[k] | 571 | // Inputs x[k], y[k], m[k]; output z[k] |
| 425 | extern void bignum_montmul (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y, uint64_t *m); | 572 | extern void bignum_montmul (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *y, const uint64_t *m); |
| 426 | 573 | ||
| 427 | // Montgomery multiply, z := (x * y / 2^256) mod p_256 | 574 | // Montgomery multiply, z := (x * y / 2^256) mod p_256 |
| 428 | // Inputs x[4], y[4]; output z[4] | 575 | // Inputs x[4], y[4]; output z[4] |
| 429 | extern void bignum_montmul_p256 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 576 | extern void bignum_montmul_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
| 430 | extern void bignum_montmul_p256_alt (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 577 | extern void bignum_montmul_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
| 431 | 578 | ||
| 432 | // Montgomery multiply, z := (x * y / 2^256) mod p_256k1 | 579 | // Montgomery multiply, z := (x * y / 2^256) mod p_256k1 |
| 433 | // Inputs x[4], y[4]; output z[4] | 580 | // Inputs x[4], y[4]; output z[4] |
| 434 | extern void bignum_montmul_p256k1 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 581 | extern void bignum_montmul_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
| 435 | extern void bignum_montmul_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 582 | extern void bignum_montmul_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
| 436 | 583 | ||
| 437 | // Montgomery multiply, z := (x * y / 2^384) mod p_384 | 584 | // Montgomery multiply, z := (x * y / 2^384) mod p_384 |
| 438 | // Inputs x[6], y[6]; output z[6] | 585 | // Inputs x[6], y[6]; output z[6] |
| 439 | extern void bignum_montmul_p384 (uint64_t z[static 6], uint64_t x[static 6], uint64_t y[static 6]); | 586 | extern void bignum_montmul_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]); |
| 440 | extern void bignum_montmul_p384_alt (uint64_t z[static 6], uint64_t x[static 6], uint64_t y[static 6]); | 587 | extern void bignum_montmul_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]); |
| 441 | 588 | ||
| 442 | // Montgomery multiply, z := (x * y / 2^576) mod p_521 | 589 | // Montgomery multiply, z := (x * y / 2^576) mod p_521 |
| 443 | // Inputs x[9], y[9]; output z[9] | 590 | // Inputs x[9], y[9]; output z[9] |
| 444 | extern void bignum_montmul_p521 (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); | 591 | extern void bignum_montmul_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9], const uint64_t y[S2N_BIGNUM_STATIC 9]); |
| 445 | extern void bignum_montmul_p521_alt (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); | 592 | extern void bignum_montmul_p521_alt (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9], const uint64_t y[S2N_BIGNUM_STATIC 9]); |
| 593 | |||
| 594 | // Montgomery multiply, z := (x * y / 2^256) mod p_sm2 | ||
| 595 | // Inputs x[4], y[4]; output z[4] | ||
| 596 | extern void bignum_montmul_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); | ||
| 597 | extern void bignum_montmul_sm2_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); | ||
| 446 | 598 | ||
| 447 | // Montgomery reduce, z := (x' / 2^{64p}) MOD m | 599 | // Montgomery reduce, z := (x' / 2^{64p}) MOD m |
| 448 | // Inputs x[n], m[k], p; output z[k] | 600 | // Inputs x[n], m[k], p; output z[k] |
| 449 | extern void bignum_montredc (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t *m, uint64_t p); | 601 | extern void bignum_montredc (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x, const uint64_t *m, uint64_t p); |
| 450 | 602 | ||
| 451 | // Montgomery square, z := (x^2 / 2^{64k}) mod m | 603 | // Montgomery square, z := (x^2 / 2^{64k}) mod m |
| 452 | // Inputs x[k], m[k]; output z[k] | 604 | // Inputs x[k], m[k]; output z[k] |
| 453 | extern void bignum_montsqr (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *m); | 605 | extern void bignum_montsqr (uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *m); |
| 454 | 606 | ||
| 455 | // Montgomery square, z := (x^2 / 2^256) mod p_256 | 607 | // Montgomery square, z := (x^2 / 2^256) mod p_256 |
| 456 | // Input x[4]; output z[4] | 608 | // Input x[4]; output z[4] |
| 457 | extern void bignum_montsqr_p256 (uint64_t z[static 4], uint64_t x[static 4]); | 609 | extern void bignum_montsqr_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 458 | extern void bignum_montsqr_p256_alt (uint64_t z[static 4], uint64_t x[static 4]); | 610 | extern void bignum_montsqr_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 459 | 611 | ||
| 460 | // Montgomery square, z := (x^2 / 2^256) mod p_256k1 | 612 | // Montgomery square, z := (x^2 / 2^256) mod p_256k1 |
| 461 | // Input x[4]; output z[4] | 613 | // Input x[4]; output z[4] |
| 462 | extern void bignum_montsqr_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); | 614 | extern void bignum_montsqr_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 463 | extern void bignum_montsqr_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4]); | 615 | extern void bignum_montsqr_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 464 | 616 | ||
| 465 | // Montgomery square, z := (x^2 / 2^384) mod p_384 | 617 | // Montgomery square, z := (x^2 / 2^384) mod p_384 |
| 466 | // Input x[6]; output z[6] | 618 | // Input x[6]; output z[6] |
| 467 | extern void bignum_montsqr_p384 (uint64_t z[static 6], uint64_t x[static 6]); | 619 | extern void bignum_montsqr_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 468 | extern void bignum_montsqr_p384_alt (uint64_t z[static 6], uint64_t x[static 6]); | 620 | extern void bignum_montsqr_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 469 | 621 | ||
| 470 | // Montgomery square, z := (x^2 / 2^576) mod p_521 | 622 | // Montgomery square, z := (x^2 / 2^576) mod p_521 |
| 471 | // Input x[9]; output z[9] | 623 | // Input x[9]; output z[9] |
| 472 | extern void bignum_montsqr_p521 (uint64_t z[static 9], uint64_t x[static 9]); | 624 | extern void bignum_montsqr_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
| 473 | extern void bignum_montsqr_p521_alt (uint64_t z[static 9], uint64_t x[static 9]); | 625 | extern void bignum_montsqr_p521_alt (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
| 626 | |||
| 627 | // Montgomery square, z := (x^2 / 2^256) mod p_sm2 | ||
| 628 | // Input x[4]; output z[4] | ||
| 629 | extern void bignum_montsqr_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 630 | extern void bignum_montsqr_sm2_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 474 | 631 | ||
| 475 | // Multiply z := x * y | 632 | // Multiply z := x * y |
| 476 | // Inputs x[m], y[n]; output z[k] | 633 | // Inputs x[m], y[n]; output z[k] |
| 477 | extern void bignum_mul (uint64_t k, uint64_t *z, uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | 634 | extern void bignum_mul (uint64_t k, uint64_t *z, uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y); |
| 478 | 635 | ||
| 479 | // Multiply z := x * y | 636 | // Multiply z := x * y |
| 480 | // Inputs x[4], y[4]; output z[8] | 637 | // Inputs x[4], y[4]; output z[8] |
| 481 | extern void bignum_mul_4_8 (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]); | 638 | extern void bignum_mul_4_8 (uint64_t z[S2N_BIGNUM_STATIC 8], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
| 482 | extern void bignum_mul_4_8_alt (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]); | 639 | extern void bignum_mul_4_8_alt (uint64_t z[S2N_BIGNUM_STATIC 8], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
| 483 | 640 | ||
| 484 | // Multiply z := x * y | 641 | // Multiply z := x * y |
| 485 | // Inputs x[6], y[6]; output z[12] | 642 | // Inputs x[6], y[6]; output z[12] |
| 486 | extern void bignum_mul_6_12 (uint64_t z[static 12], uint64_t x[static 6], uint64_t y[static 6]); | 643 | extern void bignum_mul_6_12 (uint64_t z[S2N_BIGNUM_STATIC 12], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]); |
| 487 | extern void bignum_mul_6_12_alt (uint64_t z[static 12], uint64_t x[static 6], uint64_t y[static 6]); | 644 | extern void bignum_mul_6_12_alt (uint64_t z[S2N_BIGNUM_STATIC 12], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]); |
| 488 | 645 | ||
| 489 | // Multiply z := x * y | 646 | // Multiply z := x * y |
| 490 | // Inputs x[8], y[8]; output z[16] | 647 | // Inputs x[8], y[8]; output z[16] |
| 491 | extern void bignum_mul_8_16 (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]); | 648 | extern void bignum_mul_8_16 (uint64_t z[S2N_BIGNUM_STATIC 16], const uint64_t x[S2N_BIGNUM_STATIC 8], const uint64_t y[S2N_BIGNUM_STATIC 8]); |
| 492 | extern void bignum_mul_8_16_alt (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]); | 649 | extern void bignum_mul_8_16_alt (uint64_t z[S2N_BIGNUM_STATIC 16], const uint64_t x[S2N_BIGNUM_STATIC 8], const uint64_t y[S2N_BIGNUM_STATIC 8]); |
| 493 | 650 | ||
| 494 | // Multiply modulo p_25519, z := (x * y) mod p_25519 | 651 | // Multiply modulo p_25519, z := (x * y) mod p_25519 |
| 495 | // Inputs x[4], y[4]; output z[4] | 652 | // Inputs x[4], y[4]; output z[4] |
| 496 | extern void bignum_mul_p25519 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 653 | extern void bignum_mul_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
| 497 | extern void bignum_mul_p25519_alt (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 654 | extern void bignum_mul_p25519_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
| 498 | 655 | ||
| 499 | // Multiply modulo p_256k1, z := (x * y) mod p_256k1 | 656 | // Multiply modulo p_256k1, z := (x * y) mod p_256k1 |
| 500 | // Inputs x[4], y[4]; output z[4] | 657 | // Inputs x[4], y[4]; output z[4] |
| 501 | extern void bignum_mul_p256k1 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 658 | extern void bignum_mul_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
| 502 | extern void bignum_mul_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 659 | extern void bignum_mul_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
| 503 | 660 | ||
| 504 | // Multiply modulo p_521, z := (x * y) mod p_521, assuming x and y reduced | 661 | // Multiply modulo p_521, z := (x * y) mod p_521, assuming x and y reduced |
| 505 | // Inputs x[9], y[9]; output z[9] | 662 | // Inputs x[9], y[9]; output z[9] |
| 506 | extern void bignum_mul_p521 (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); | 663 | extern void bignum_mul_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9], const uint64_t y[S2N_BIGNUM_STATIC 9]); |
| 507 | extern void bignum_mul_p521_alt (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); | 664 | extern void bignum_mul_p521_alt (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9], const uint64_t y[S2N_BIGNUM_STATIC 9]); |
| 508 | 665 | ||
| 509 | // Multiply bignum by 10 and add word: z := 10 * z + d | 666 | // Multiply bignum by 10 and add word: z := 10 * z + d |
| 510 | // Inputs z[k], d; outputs function return (carry) and z[k] | 667 | // Inputs z[k], d; outputs function return (carry) and z[k] |
| @@ -512,55 +669,59 @@ extern uint64_t bignum_muladd10 (uint64_t k, uint64_t *z, uint64_t d); | |||
| 512 | 669 | ||
| 513 | // Multiplex/select z := x (if p nonzero) or z := y (if p zero) | 670 | // Multiplex/select z := x (if p nonzero) or z := y (if p zero) |
| 514 | // Inputs p, x[k], y[k]; output z[k] | 671 | // Inputs p, x[k], y[k]; output z[k] |
| 515 | extern void bignum_mux (uint64_t p, uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y); | 672 | extern void bignum_mux (uint64_t p, uint64_t k, uint64_t *z, const uint64_t *x, const uint64_t *y); |
| 516 | 673 | ||
| 517 | // 256-bit multiplex/select z := x (if p nonzero) or z := y (if p zero) | 674 | // 256-bit multiplex/select z := x (if p nonzero) or z := y (if p zero) |
| 518 | // Inputs p, x[4], y[4]; output z[4] | 675 | // Inputs p, x[4], y[4]; output z[4] |
| 519 | extern void bignum_mux_4 (uint64_t p, uint64_t z[static 4],uint64_t x[static 4], uint64_t y[static 4]); | 676 | extern void bignum_mux_4 (uint64_t p, uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
| 520 | 677 | ||
| 521 | // 384-bit multiplex/select z := x (if p nonzero) or z := y (if p zero) | 678 | // 384-bit multiplex/select z := x (if p nonzero) or z := y (if p zero) |
| 522 | // Inputs p, x[6], y[6]; output z[6] | 679 | // Inputs p, x[6], y[6]; output z[6] |
| 523 | extern void bignum_mux_6 (uint64_t p, uint64_t z[static 6],uint64_t x[static 6], uint64_t y[static 6]); | 680 | extern void bignum_mux_6 (uint64_t p, uint64_t z[S2N_BIGNUM_STATIC 6],const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]); |
| 524 | 681 | ||
| 525 | // Select element from 16-element table, z := xs[k*i] | 682 | // Select element from 16-element table, z := xs[k*i] |
| 526 | // Inputs xs[16*k], i; output z[k] | 683 | // Inputs xs[16*k], i; output z[k] |
| 527 | extern void bignum_mux16 (uint64_t k, uint64_t *z, uint64_t *xs, uint64_t i); | 684 | extern void bignum_mux16 (uint64_t k, uint64_t *z, const uint64_t *xs, uint64_t i); |
| 528 | 685 | ||
| 529 | // Negate modulo p_25519, z := (-x) mod p_25519, assuming x reduced | 686 | // Negate modulo p_25519, z := (-x) mod p_25519, assuming x reduced |
| 530 | // Input x[4]; output z[4] | 687 | // Input x[4]; output z[4] |
| 531 | extern void bignum_neg_p25519 (uint64_t z[static 4], uint64_t x[static 4]); | 688 | extern void bignum_neg_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 532 | 689 | ||
| 533 | // Negate modulo p_256, z := (-x) mod p_256, assuming x reduced | 690 | // Negate modulo p_256, z := (-x) mod p_256, assuming x reduced |
| 534 | // Input x[4]; output z[4] | 691 | // Input x[4]; output z[4] |
| 535 | extern void bignum_neg_p256 (uint64_t z[static 4], uint64_t x[static 4]); | 692 | extern void bignum_neg_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 536 | 693 | ||
| 537 | // Negate modulo p_256k1, z := (-x) mod p_256k1, assuming x reduced | 694 | // Negate modulo p_256k1, z := (-x) mod p_256k1, assuming x reduced |
| 538 | // Input x[4]; output z[4] | 695 | // Input x[4]; output z[4] |
| 539 | extern void bignum_neg_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); | 696 | extern void bignum_neg_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 540 | 697 | ||
| 541 | // Negate modulo p_384, z := (-x) mod p_384, assuming x reduced | 698 | // Negate modulo p_384, z := (-x) mod p_384, assuming x reduced |
| 542 | // Input x[6]; output z[6] | 699 | // Input x[6]; output z[6] |
| 543 | extern void bignum_neg_p384 (uint64_t z[static 6], uint64_t x[static 6]); | 700 | extern void bignum_neg_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 544 | 701 | ||
| 545 | // Negate modulo p_521, z := (-x) mod p_521, assuming x reduced | 702 | // Negate modulo p_521, z := (-x) mod p_521, assuming x reduced |
| 546 | // Input x[9]; output z[9] | 703 | // Input x[9]; output z[9] |
| 547 | extern void bignum_neg_p521 (uint64_t z[static 9], uint64_t x[static 9]); | 704 | extern void bignum_neg_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
| 705 | |||
| 706 | // Negate modulo p_sm2, z := (-x) mod p_sm2, assuming x reduced | ||
| 707 | // Input x[4]; output z[4] | ||
| 708 | extern void bignum_neg_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 548 | 709 | ||
| 549 | // Negated modular inverse, z := (-1/x) mod 2^{64k} | 710 | // Negated modular inverse, z := (-1/x) mod 2^{64k} |
| 550 | // Input x[k]; output z[k] | 711 | // Input x[k]; output z[k] |
| 551 | extern void bignum_negmodinv (uint64_t k, uint64_t *z, uint64_t *x); | 712 | extern void bignum_negmodinv (uint64_t k, uint64_t *z, const uint64_t *x); |
| 552 | 713 | ||
| 553 | // Test bignum for nonzero-ness x =/= 0 | 714 | // Test bignum for nonzero-ness x =/= 0 |
| 554 | // Input x[k]; output function return | 715 | // Input x[k]; output function return |
| 555 | extern uint64_t bignum_nonzero (uint64_t k, uint64_t *x); | 716 | extern uint64_t bignum_nonzero (uint64_t k, const uint64_t *x); |
| 556 | 717 | ||
| 557 | // Test 256-bit bignum for nonzero-ness x =/= 0 | 718 | // Test 256-bit bignum for nonzero-ness x =/= 0 |
| 558 | // Input x[4]; output function return | 719 | // Input x[4]; output function return |
| 559 | extern uint64_t bignum_nonzero_4(uint64_t x[static 4]); | 720 | extern uint64_t bignum_nonzero_4(const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 560 | 721 | ||
| 561 | // Test 384-bit bignum for nonzero-ness x =/= 0 | 722 | // Test 384-bit bignum for nonzero-ness x =/= 0 |
| 562 | // Input x[6]; output function return | 723 | // Input x[6]; output function return |
| 563 | extern uint64_t bignum_nonzero_6(uint64_t x[static 6]); | 724 | extern uint64_t bignum_nonzero_6(const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 564 | 725 | ||
| 565 | // Normalize bignum in-place by shifting left till top bit is 1 | 726 | // Normalize bignum in-place by shifting left till top bit is 1 |
| 566 | // Input z[k]; outputs function return (bits shifted left) and z[k] | 727 | // Input z[k]; outputs function return (bits shifted left) and z[k] |
| @@ -568,7 +729,7 @@ extern uint64_t bignum_normalize (uint64_t k, uint64_t *z); | |||
| 568 | 729 | ||
| 569 | // Test bignum for odd-ness | 730 | // Test bignum for odd-ness |
| 570 | // Input x[k]; output function return | 731 | // Input x[k]; output function return |
| 571 | extern uint64_t bignum_odd (uint64_t k, uint64_t *x); | 732 | extern uint64_t bignum_odd (uint64_t k, const uint64_t *x); |
| 572 | 733 | ||
| 573 | // Convert single digit to bignum, z := n | 734 | // Convert single digit to bignum, z := n |
| 574 | // Input n; output z[k] | 735 | // Input n; output z[k] |
| @@ -576,39 +737,43 @@ extern void bignum_of_word (uint64_t k, uint64_t *z, uint64_t n); | |||
| 576 | 737 | ||
| 577 | // Optionally add, z := x + y (if p nonzero) or z := x (if p zero) | 738 | // Optionally add, z := x + y (if p nonzero) or z := x (if p zero) |
| 578 | // Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k] | 739 | // Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k] |
| 579 | extern uint64_t bignum_optadd (uint64_t k, uint64_t *z, uint64_t *x, uint64_t p, uint64_t *y); | 740 | extern uint64_t bignum_optadd (uint64_t k, uint64_t *z, const uint64_t *x, uint64_t p, const uint64_t *y); |
| 580 | 741 | ||
| 581 | // Optionally negate, z := -x (if p nonzero) or z := x (if p zero) | 742 | // Optionally negate, z := -x (if p nonzero) or z := x (if p zero) |
| 582 | // Inputs p, x[k]; outputs function return (nonzero input) and z[k] | 743 | // Inputs p, x[k]; outputs function return (nonzero input) and z[k] |
| 583 | extern uint64_t bignum_optneg (uint64_t k, uint64_t *z, uint64_t p, uint64_t *x); | 744 | extern uint64_t bignum_optneg (uint64_t k, uint64_t *z, uint64_t p, const uint64_t *x); |
| 584 | 745 | ||
| 585 | // Optionally negate modulo p_25519, z := (-x) mod p_25519 (if p nonzero) or z := x (if p zero), assuming x reduced | 746 | // Optionally negate modulo p_25519, z := (-x) mod p_25519 (if p nonzero) or z := x (if p zero), assuming x reduced |
| 586 | // Inputs p, x[4]; output z[4] | 747 | // Inputs p, x[4]; output z[4] |
| 587 | extern void bignum_optneg_p25519 (uint64_t z[static 4], uint64_t p, uint64_t x[static 4]); | 748 | extern void bignum_optneg_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t p, const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 588 | 749 | ||
| 589 | // Optionally negate modulo p_256, z := (-x) mod p_256 (if p nonzero) or z := x (if p zero), assuming x reduced | 750 | // Optionally negate modulo p_256, z := (-x) mod p_256 (if p nonzero) or z := x (if p zero), assuming x reduced |
| 590 | // Inputs p, x[4]; output z[4] | 751 | // Inputs p, x[4]; output z[4] |
| 591 | extern void bignum_optneg_p256 (uint64_t z[static 4], uint64_t p, uint64_t x[static 4]); | 752 | extern void bignum_optneg_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t p, const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 592 | 753 | ||
| 593 | // Optionally negate modulo p_256k1, z := (-x) mod p_256k1 (if p nonzero) or z := x (if p zero), assuming x reduced | 754 | // Optionally negate modulo p_256k1, z := (-x) mod p_256k1 (if p nonzero) or z := x (if p zero), assuming x reduced |
| 594 | // Inputs p, x[4]; output z[4] | 755 | // Inputs p, x[4]; output z[4] |
| 595 | extern void bignum_optneg_p256k1 (uint64_t z[static 4], uint64_t p, uint64_t x[static 4]); | 756 | extern void bignum_optneg_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t p, const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 596 | 757 | ||
| 597 | // Optionally negate modulo p_384, z := (-x) mod p_384 (if p nonzero) or z := x (if p zero), assuming x reduced | 758 | // Optionally negate modulo p_384, z := (-x) mod p_384 (if p nonzero) or z := x (if p zero), assuming x reduced |
| 598 | // Inputs p, x[6]; output z[6] | 759 | // Inputs p, x[6]; output z[6] |
| 599 | extern void bignum_optneg_p384 (uint64_t z[static 6], uint64_t p, uint64_t x[static 6]); | 760 | extern void bignum_optneg_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t p, const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 600 | 761 | ||
| 601 | // Optionally negate modulo p_521, z := (-x) mod p_521 (if p nonzero) or z := x (if p zero), assuming x reduced | 762 | // Optionally negate modulo p_521, z := (-x) mod p_521 (if p nonzero) or z := x (if p zero), assuming x reduced |
| 602 | // Inputs p, x[9]; output z[9] | 763 | // Inputs p, x[9]; output z[9] |
| 603 | extern void bignum_optneg_p521 (uint64_t z[static 9], uint64_t p, uint64_t x[static 9]); | 764 | extern void bignum_optneg_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], uint64_t p, const uint64_t x[S2N_BIGNUM_STATIC 9]); |
| 765 | |||
| 766 | // Optionally negate modulo p_sm2, z := (-x) mod p_sm2 (if p nonzero) or z := x (if p zero), assuming x reduced | ||
| 767 | // Inputs p, x[4]; output z[4] | ||
| 768 | extern void bignum_optneg_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t p, const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 604 | 769 | ||
| 605 | // Optionally subtract, z := x - y (if p nonzero) or z := x (if p zero) | 770 | // Optionally subtract, z := x - y (if p nonzero) or z := x (if p zero) |
| 606 | // Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k] | 771 | // Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k] |
| 607 | extern uint64_t bignum_optsub (uint64_t k, uint64_t *z, uint64_t *x, uint64_t p, uint64_t *y); | 772 | extern uint64_t bignum_optsub (uint64_t k, uint64_t *z, const uint64_t *x, uint64_t p, const uint64_t *y); |
| 608 | 773 | ||
| 609 | // Optionally subtract or add, z := x + sgn(p) * y interpreting p as signed | 774 | // Optionally subtract or add, z := x + sgn(p) * y interpreting p as signed |
| 610 | // Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k] | 775 | // Inputs x[k], p, y[k]; outputs function return (carry-out) and z[k] |
| 611 | extern uint64_t bignum_optsubadd (uint64_t k, uint64_t *z, uint64_t *x, uint64_t p, uint64_t *y); | 776 | extern uint64_t bignum_optsubadd (uint64_t k, uint64_t *z, const uint64_t *x, uint64_t p, const uint64_t *y); |
| 612 | 777 | ||
| 613 | // Return bignum of power of 2, z := 2^n | 778 | // Return bignum of power of 2, z := 2^n |
| 614 | // Input n; output z[k] | 779 | // Input n; output z[k] |
| @@ -616,216 +781,376 @@ extern void bignum_pow2 (uint64_t k, uint64_t *z, uint64_t n); | |||
| 616 | 781 | ||
| 617 | // Shift bignum left by c < 64 bits z := x * 2^c | 782 | // Shift bignum left by c < 64 bits z := x * 2^c |
| 618 | // Inputs x[n], c; outputs function return (carry-out) and z[k] | 783 | // Inputs x[n], c; outputs function return (carry-out) and z[k] |
| 619 | extern uint64_t bignum_shl_small (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t c); | 784 | extern uint64_t bignum_shl_small (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x, uint64_t c); |
| 620 | 785 | ||
| 621 | // Shift bignum right by c < 64 bits z := floor(x / 2^c) | 786 | // Shift bignum right by c < 64 bits z := floor(x / 2^c) |
| 622 | // Inputs x[n], c; outputs function return (bits shifted out) and z[k] | 787 | // Inputs x[n], c; outputs function return (bits shifted out) and z[k] |
| 623 | extern uint64_t bignum_shr_small (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x, uint64_t c); | 788 | extern uint64_t bignum_shr_small (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x, uint64_t c); |
| 624 | 789 | ||
| 625 | // Square, z := x^2 | 790 | // Square, z := x^2 |
| 626 | // Input x[n]; output z[k] | 791 | // Input x[n]; output z[k] |
| 627 | extern void bignum_sqr (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x); | 792 | extern void bignum_sqr (uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x); |
| 628 | 793 | ||
| 629 | // Square, z := x^2 | 794 | // Square, z := x^2 |
| 630 | // Input x[4]; output z[8] | 795 | // Input x[4]; output z[8] |
| 631 | extern void bignum_sqr_4_8 (uint64_t z[static 8], uint64_t x[static 4]); | 796 | extern void bignum_sqr_4_8 (uint64_t z[S2N_BIGNUM_STATIC 8], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 632 | extern void bignum_sqr_4_8_alt (uint64_t z[static 8], uint64_t x[static 4]); | 797 | extern void bignum_sqr_4_8_alt (uint64_t z[S2N_BIGNUM_STATIC 8], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 633 | 798 | ||
| 634 | // Square, z := x^2 | 799 | // Square, z := x^2 |
| 635 | // Input x[6]; output z[12] | 800 | // Input x[6]; output z[12] |
| 636 | extern void bignum_sqr_6_12 (uint64_t z[static 12], uint64_t x[static 6]); | 801 | extern void bignum_sqr_6_12 (uint64_t z[S2N_BIGNUM_STATIC 12], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 637 | extern void bignum_sqr_6_12_alt (uint64_t z[static 12], uint64_t x[static 6]); | 802 | extern void bignum_sqr_6_12_alt (uint64_t z[S2N_BIGNUM_STATIC 12], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 638 | 803 | ||
| 639 | // Square, z := x^2 | 804 | // Square, z := x^2 |
| 640 | // Input x[8]; output z[16] | 805 | // Input x[8]; output z[16] |
| 641 | extern void bignum_sqr_8_16 (uint64_t z[static 16], uint64_t x[static 8]); | 806 | extern void bignum_sqr_8_16 (uint64_t z[S2N_BIGNUM_STATIC 16], const uint64_t x[S2N_BIGNUM_STATIC 8]); |
| 642 | extern void bignum_sqr_8_16_alt (uint64_t z[static 16], uint64_t x[static 8]); | 807 | extern void bignum_sqr_8_16_alt (uint64_t z[S2N_BIGNUM_STATIC 16], const uint64_t x[S2N_BIGNUM_STATIC 8]); |
| 643 | 808 | ||
| 644 | // Square modulo p_25519, z := (x^2) mod p_25519 | 809 | // Square modulo p_25519, z := (x^2) mod p_25519 |
| 645 | // Input x[4]; output z[4] | 810 | // Input x[4]; output z[4] |
| 646 | extern void bignum_sqr_p25519 (uint64_t z[static 4], uint64_t x[static 4]); | 811 | extern void bignum_sqr_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 647 | extern void bignum_sqr_p25519_alt (uint64_t z[static 4], uint64_t x[static 4]); | 812 | extern void bignum_sqr_p25519_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 648 | 813 | ||
| 649 | // Square modulo p_256k1, z := (x^2) mod p_256k1 | 814 | // Square modulo p_256k1, z := (x^2) mod p_256k1 |
| 650 | // Input x[4]; output z[4] | 815 | // Input x[4]; output z[4] |
| 651 | extern void bignum_sqr_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); | 816 | extern void bignum_sqr_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 652 | extern void bignum_sqr_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4]); | 817 | extern void bignum_sqr_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 653 | 818 | ||
| 654 | // Square modulo p_521, z := (x^2) mod p_521, assuming x reduced | 819 | // Square modulo p_521, z := (x^2) mod p_521, assuming x reduced |
| 655 | // Input x[9]; output z[9] | 820 | // Input x[9]; output z[9] |
| 656 | extern void bignum_sqr_p521 (uint64_t z[static 9], uint64_t x[static 9]); | 821 | extern void bignum_sqr_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
| 657 | extern void bignum_sqr_p521_alt (uint64_t z[static 9], uint64_t x[static 9]); | 822 | extern void bignum_sqr_p521_alt (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
| 823 | |||
| 824 | // Square root modulo p_25519 | ||
| 825 | // Input x[4]; output function return (Legendre symbol) and z[4] | ||
| 826 | extern int64_t bignum_sqrt_p25519(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 827 | extern int64_t bignum_sqrt_p25519_alt(uint64_t z[S2N_BIGNUM_STATIC 4],const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 658 | 828 | ||
| 659 | // Subtract, z := x - y | 829 | // Subtract, z := x - y |
| 660 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] | 830 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] |
| 661 | extern uint64_t bignum_sub (uint64_t p, uint64_t *z, uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | 831 | extern uint64_t bignum_sub (uint64_t p, uint64_t *z, uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y); |
| 662 | 832 | ||
| 663 | // Subtract modulo p_25519, z := (x - y) mod p_25519, assuming x and y reduced | 833 | // Subtract modulo p_25519, z := (x - y) mod p_25519, assuming x and y reduced |
| 664 | // Inputs x[4], y[4]; output z[4] | 834 | // Inputs x[4], y[4]; output z[4] |
| 665 | extern void bignum_sub_p25519 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 835 | extern void bignum_sub_p25519 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
| 666 | 836 | ||
| 667 | // Subtract modulo p_256, z := (x - y) mod p_256, assuming x and y reduced | 837 | // Subtract modulo p_256, z := (x - y) mod p_256, assuming x and y reduced |
| 668 | // Inputs x[4], y[4]; output z[4] | 838 | // Inputs x[4], y[4]; output z[4] |
| 669 | extern void bignum_sub_p256 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 839 | extern void bignum_sub_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
| 670 | 840 | ||
| 671 | // Subtract modulo p_256k1, z := (x - y) mod p_256k1, assuming x and y reduced | 841 | // Subtract modulo p_256k1, z := (x - y) mod p_256k1, assuming x and y reduced |
| 672 | // Inputs x[4], y[4]; output z[4] | 842 | // Inputs x[4], y[4]; output z[4] |
| 673 | extern void bignum_sub_p256k1 (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); | 843 | extern void bignum_sub_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); |
| 674 | 844 | ||
| 675 | // Subtract modulo p_384, z := (x - y) mod p_384, assuming x and y reduced | 845 | // Subtract modulo p_384, z := (x - y) mod p_384, assuming x and y reduced |
| 676 | // Inputs x[6], y[6]; output z[6] | 846 | // Inputs x[6], y[6]; output z[6] |
| 677 | extern void bignum_sub_p384 (uint64_t z[static 6], uint64_t x[static 6], uint64_t y[static 6]); | 847 | extern void bignum_sub_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]); |
| 678 | 848 | ||
| 679 | // Subtract modulo p_521, z := (x - y) mod p_521, assuming x and y reduced | 849 | // Subtract modulo p_521, z := (x - y) mod p_521, assuming x and y reduced |
| 680 | // Inputs x[9], y[9]; output z[9] | 850 | // Inputs x[9], y[9]; output z[9] |
| 681 | extern void bignum_sub_p521 (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); | 851 | extern void bignum_sub_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9], const uint64_t y[S2N_BIGNUM_STATIC 9]); |
| 852 | |||
| 853 | // Subtract modulo p_sm2, z := (x - y) mod p_sm2, assuming x and y reduced | ||
| 854 | // Inputs x[4], y[4]; output z[4] | ||
| 855 | extern void bignum_sub_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4], const uint64_t y[S2N_BIGNUM_STATIC 4]); | ||
| 682 | 856 | ||
| 683 | // Convert 4-digit (256-bit) bignum to big-endian bytes | 857 | // Convert 4-digit (256-bit) bignum to big-endian bytes |
| 684 | // Input x[4]; output z[32] (bytes) | 858 | // Input x[4]; output z[32] (bytes) |
| 685 | extern void bignum_tobebytes_4 (uint8_t z[static 32], uint64_t x[static 4]); | 859 | extern void bignum_tobebytes_4 (uint8_t z[S2N_BIGNUM_STATIC 32], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 686 | 860 | ||
| 687 | // Convert 6-digit (384-bit) bignum to big-endian bytes | 861 | // Convert 6-digit (384-bit) bignum to big-endian bytes |
| 688 | // Input x[6]; output z[48] (bytes) | 862 | // Input x[6]; output z[48] (bytes) |
| 689 | extern void bignum_tobebytes_6 (uint8_t z[static 48], uint64_t x[static 6]); | 863 | extern void bignum_tobebytes_6 (uint8_t z[S2N_BIGNUM_STATIC 48], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 690 | 864 | ||
| 691 | // Convert 4-digit (256-bit) bignum to little-endian bytes | 865 | // Convert 4-digit (256-bit) bignum to little-endian bytes |
| 692 | // Input x[4]; output z[32] (bytes) | 866 | // Input x[4]; output z[32] (bytes) |
| 693 | extern void bignum_tolebytes_4 (uint8_t z[static 32], uint64_t x[static 4]); | 867 | extern void bignum_tolebytes_4 (uint8_t z[S2N_BIGNUM_STATIC 32], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 694 | 868 | ||
| 695 | // Convert 6-digit (384-bit) bignum to little-endian bytes | 869 | // Convert 6-digit (384-bit) bignum to little-endian bytes |
| 696 | // Input x[6]; output z[48] (bytes) | 870 | // Input x[6]; output z[48] (bytes) |
| 697 | extern void bignum_tolebytes_6 (uint8_t z[static 48], uint64_t x[static 6]); | 871 | extern void bignum_tolebytes_6 (uint8_t z[S2N_BIGNUM_STATIC 48], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 698 | 872 | ||
| 699 | // Convert 9-digit 528-bit bignum to little-endian bytes | 873 | // Convert 9-digit 528-bit bignum to little-endian bytes |
| 700 | // Input x[6]; output z[66] (bytes) | 874 | // Input x[6]; output z[66] (bytes) |
| 701 | extern void bignum_tolebytes_p521 (uint8_t z[static 66], uint64_t x[static 9]); | 875 | extern void bignum_tolebytes_p521 (uint8_t z[S2N_BIGNUM_STATIC 66], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
| 702 | 876 | ||
| 703 | // Convert to Montgomery form z := (2^256 * x) mod p_256 | 877 | // Convert to Montgomery form z := (2^256 * x) mod p_256 |
| 704 | // Input x[4]; output z[4] | 878 | // Input x[4]; output z[4] |
| 705 | extern void bignum_tomont_p256 (uint64_t z[static 4], uint64_t x[static 4]); | 879 | extern void bignum_tomont_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 706 | extern void bignum_tomont_p256_alt (uint64_t z[static 4], uint64_t x[static 4]); | 880 | extern void bignum_tomont_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 707 | 881 | ||
| 708 | // Convert to Montgomery form z := (2^256 * x) mod p_256k1 | 882 | // Convert to Montgomery form z := (2^256 * x) mod p_256k1 |
| 709 | // Input x[4]; output z[4] | 883 | // Input x[4]; output z[4] |
| 710 | extern void bignum_tomont_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); | 884 | extern void bignum_tomont_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 711 | extern void bignum_tomont_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4]); | 885 | extern void bignum_tomont_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 712 | 886 | ||
| 713 | // Convert to Montgomery form z := (2^384 * x) mod p_384 | 887 | // Convert to Montgomery form z := (2^384 * x) mod p_384 |
| 714 | // Input x[6]; output z[6] | 888 | // Input x[6]; output z[6] |
| 715 | extern void bignum_tomont_p384 (uint64_t z[static 6], uint64_t x[static 6]); | 889 | extern void bignum_tomont_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 716 | extern void bignum_tomont_p384_alt (uint64_t z[static 6], uint64_t x[static 6]); | 890 | extern void bignum_tomont_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 717 | 891 | ||
| 718 | // Convert to Montgomery form z := (2^576 * x) mod p_521 | 892 | // Convert to Montgomery form z := (2^576 * x) mod p_521 |
| 719 | // Input x[9]; output z[9] | 893 | // Input x[9]; output z[9] |
| 720 | extern void bignum_tomont_p521 (uint64_t z[static 9], uint64_t x[static 9]); | 894 | extern void bignum_tomont_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
| 895 | |||
| 896 | // Convert to Montgomery form z := (2^256 * x) mod p_sm2 | ||
| 897 | // Input x[4]; output z[4] | ||
| 898 | extern void bignum_tomont_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 721 | 899 | ||
| 722 | // Triple modulo p_256, z := (3 * x) mod p_256 | 900 | // Triple modulo p_256, z := (3 * x) mod p_256 |
| 723 | // Input x[4]; output z[4] | 901 | // Input x[4]; output z[4] |
| 724 | extern void bignum_triple_p256 (uint64_t z[static 4], uint64_t x[static 4]); | 902 | extern void bignum_triple_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 725 | extern void bignum_triple_p256_alt (uint64_t z[static 4], uint64_t x[static 4]); | 903 | extern void bignum_triple_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 726 | 904 | ||
| 727 | // Triple modulo p_256k1, z := (3 * x) mod p_256k1 | 905 | // Triple modulo p_256k1, z := (3 * x) mod p_256k1 |
| 728 | // Input x[4]; output z[4] | 906 | // Input x[4]; output z[4] |
| 729 | extern void bignum_triple_p256k1 (uint64_t z[static 4], uint64_t x[static 4]); | 907 | extern void bignum_triple_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 730 | extern void bignum_triple_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4]); | 908 | extern void bignum_triple_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); |
| 731 | 909 | ||
| 732 | // Triple modulo p_384, z := (3 * x) mod p_384 | 910 | // Triple modulo p_384, z := (3 * x) mod p_384 |
| 733 | // Input x[6]; output z[6] | 911 | // Input x[6]; output z[6] |
| 734 | extern void bignum_triple_p384 (uint64_t z[static 6], uint64_t x[static 6]); | 912 | extern void bignum_triple_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 735 | extern void bignum_triple_p384_alt (uint64_t z[static 6], uint64_t x[static 6]); | 913 | extern void bignum_triple_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]); |
| 736 | 914 | ||
| 737 | // Triple modulo p_521, z := (3 * x) mod p_521, assuming x reduced | 915 | // Triple modulo p_521, z := (3 * x) mod p_521, assuming x reduced |
| 738 | // Input x[9]; output z[9] | 916 | // Input x[9]; output z[9] |
| 739 | extern void bignum_triple_p521 (uint64_t z[static 9], uint64_t x[static 9]); | 917 | extern void bignum_triple_p521 (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
| 740 | extern void bignum_triple_p521_alt (uint64_t z[static 9], uint64_t x[static 9]); | 918 | extern void bignum_triple_p521_alt (uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]); |
| 919 | |||
| 920 | // Triple modulo p_sm2, z := (3 * x) mod p_sm2 | ||
| 921 | // Input x[4]; output z[4] | ||
| 922 | extern void bignum_triple_sm2 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 923 | extern void bignum_triple_sm2_alt (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); | ||
| 741 | 924 | ||
| 742 | // Montgomery ladder step for curve25519 | 925 | // Montgomery ladder step for curve25519 |
| 743 | // Inputs point[8], pp[16], b; output rr[16] | 926 | // Inputs point[8], pp[16], b; output rr[16] |
| 744 | extern void curve25519_ladderstep(uint64_t rr[16],uint64_t point[8],uint64_t pp[16],uint64_t b); | 927 | extern void curve25519_ladderstep(uint64_t rr[16],const uint64_t point[8],const uint64_t pp[16],uint64_t b); |
| 745 | extern void curve25519_ladderstep_alt(uint64_t rr[16],uint64_t point[8],uint64_t pp[16],uint64_t b); | 928 | extern void curve25519_ladderstep_alt(uint64_t rr[16],const uint64_t point[8],const uint64_t pp[16],uint64_t b); |
| 746 | 929 | ||
| 747 | // Projective scalar multiplication, x coordinate only, for curve25519 | 930 | // Projective scalar multiplication, x coordinate only, for curve25519 |
| 748 | // Inputs scalar[4], point[4]; output res[8] | 931 | // Inputs scalar[4], point[4]; output res[8] |
| 749 | extern void curve25519_pxscalarmul(uint64_t res[static 8],uint64_t scalar[static 4],uint64_t point[static 4]); | 932 | extern void curve25519_pxscalarmul(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 4]); |
| 750 | extern void curve25519_pxscalarmul_alt(uint64_t res[static 8],uint64_t scalar[static 4],uint64_t point[static 4]); | 933 | extern void curve25519_pxscalarmul_alt(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 4]); |
| 751 | 934 | ||
| 752 | // x25519 function for curve25519 | 935 | // x25519 function for curve25519 |
| 753 | // Inputs scalar[4], point[4]; output res[4] | 936 | // Inputs scalar[4], point[4]; output res[4] |
| 754 | extern void curve25519_x25519(uint64_t res[static 4],uint64_t scalar[static 4],uint64_t point[static 4]); | 937 | extern void curve25519_x25519(uint64_t res[S2N_BIGNUM_STATIC 4],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 4]); |
| 755 | extern void curve25519_x25519_alt(uint64_t res[static 4],uint64_t scalar[static 4],uint64_t point[static 4]); | 938 | extern void curve25519_x25519_alt(uint64_t res[S2N_BIGNUM_STATIC 4],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 4]); |
| 939 | |||
| 940 | // x25519 function for curve25519 (byte array arguments) | ||
| 941 | // Inputs scalar[32] (bytes), point[32] (bytes); output res[32] (bytes) | ||
| 942 | extern void curve25519_x25519_byte(uint8_t res[S2N_BIGNUM_STATIC 32],const uint8_t scalar[S2N_BIGNUM_STATIC 32],const uint8_t point[S2N_BIGNUM_STATIC 32]); | ||
| 943 | extern void curve25519_x25519_byte_alt(uint8_t res[S2N_BIGNUM_STATIC 32],const uint8_t scalar[S2N_BIGNUM_STATIC 32],const uint8_t point[S2N_BIGNUM_STATIC 32]); | ||
| 756 | 944 | ||
| 757 | // x25519 function for curve25519 on base element 9 | 945 | // x25519 function for curve25519 on base element 9 |
| 758 | // Input scalar[4]; output res[4] | 946 | // Input scalar[4]; output res[4] |
| 759 | extern void curve25519_x25519base(uint64_t res[static 4],uint64_t scalar[static 4]); | 947 | extern void curve25519_x25519base(uint64_t res[S2N_BIGNUM_STATIC 4],const uint64_t scalar[S2N_BIGNUM_STATIC 4]); |
| 760 | extern void curve25519_x25519base_alt(uint64_t res[static 4],uint64_t scalar[static 4]); | 948 | extern void curve25519_x25519base_alt(uint64_t res[S2N_BIGNUM_STATIC 4],const uint64_t scalar[S2N_BIGNUM_STATIC 4]); |
| 949 | |||
| 950 | // x25519 function for curve25519 on base element 9 (byte array arguments) | ||
| 951 | // Input scalar[32] (bytes); output res[32] (bytes) | ||
| 952 | extern void curve25519_x25519base_byte(uint8_t res[S2N_BIGNUM_STATIC 32],const uint8_t scalar[S2N_BIGNUM_STATIC 32]); | ||
| 953 | extern void curve25519_x25519base_byte_alt(uint8_t res[S2N_BIGNUM_STATIC 32],const uint8_t scalar[S2N_BIGNUM_STATIC 32]); | ||
| 954 | |||
| 955 | // Decode compressed 256-bit form of edwards25519 point | ||
| 956 | // Input c[32] (bytes); output function return and z[8] | ||
| 957 | extern uint64_t edwards25519_decode(uint64_t z[S2N_BIGNUM_STATIC 8], const uint8_t c[S2N_BIGNUM_STATIC 32]); | ||
| 958 | extern uint64_t edwards25519_decode_alt(uint64_t z[S2N_BIGNUM_STATIC 8], const uint8_t c[S2N_BIGNUM_STATIC 32]); | ||
| 959 | |||
| 960 | // Encode edwards25519 point into compressed form as 256-bit number | ||
| 961 | // Input p[8]; output z[32] (bytes) | ||
| 962 | extern void edwards25519_encode(uint8_t z[S2N_BIGNUM_STATIC 32], const uint64_t p[S2N_BIGNUM_STATIC 8]); | ||
| 761 | 963 | ||
| 762 | // Extended projective addition for edwards25519 | 964 | // Extended projective addition for edwards25519 |
| 763 | // Inputs p1[16], p2[16]; output p3[16] | 965 | // Inputs p1[16], p2[16]; output p3[16] |
| 764 | extern void edwards25519_epadd(uint64_t p3[static 16],uint64_t p1[static 16],uint64_t p2[static 16]); | 966 | extern void edwards25519_epadd(uint64_t p3[S2N_BIGNUM_STATIC 16],const uint64_t p1[S2N_BIGNUM_STATIC 16],const uint64_t p2[S2N_BIGNUM_STATIC 16]); |
| 765 | extern void edwards25519_epadd_alt(uint64_t p3[static 16],uint64_t p1[static 16],uint64_t p2[static 16]); | 967 | extern void edwards25519_epadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 16],const uint64_t p1[S2N_BIGNUM_STATIC 16],const uint64_t p2[S2N_BIGNUM_STATIC 16]); |
| 766 | 968 | ||
| 767 | // Extended projective doubling for edwards25519 | 969 | // Extended projective doubling for edwards25519 |
| 768 | // Inputs p1[12]; output p3[16] | 970 | // Inputs p1[12]; output p3[16] |
| 769 | extern void edwards25519_epdouble(uint64_t p3[static 16],uint64_t p1[static 12]); | 971 | extern void edwards25519_epdouble(uint64_t p3[S2N_BIGNUM_STATIC 16],const uint64_t p1[S2N_BIGNUM_STATIC 12]); |
| 770 | extern void edwards25519_epdouble_alt(uint64_t p3[static 16],uint64_t p1[static 12]); | 972 | extern void edwards25519_epdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 16],const uint64_t p1[S2N_BIGNUM_STATIC 12]); |
| 771 | 973 | ||
| 772 | // Projective doubling for edwards25519 | 974 | // Projective doubling for edwards25519 |
| 773 | // Inputs p1[12]; output p3[12] | 975 | // Inputs p1[12]; output p3[12] |
| 774 | extern void edwards25519_pdouble(uint64_t p3[static 12],uint64_t p1[static 12]); | 976 | extern void edwards25519_pdouble(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]); |
| 775 | extern void edwards25519_pdouble_alt(uint64_t p3[static 12],uint64_t p1[static 12]); | 977 | extern void edwards25519_pdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]); |
| 776 | 978 | ||
| 777 | // Extended projective + precomputed mixed addition for edwards25519 | 979 | // Extended projective + precomputed mixed addition for edwards25519 |
| 778 | // Inputs p1[16], p2[12]; output p3[16] | 980 | // Inputs p1[16], p2[12]; output p3[16] |
| 779 | extern void edwards25519_pepadd(uint64_t p3[static 16],uint64_t p1[static 16],uint64_t p2[static 12]); | 981 | extern void edwards25519_pepadd(uint64_t p3[S2N_BIGNUM_STATIC 16],const uint64_t p1[S2N_BIGNUM_STATIC 16],const uint64_t p2[S2N_BIGNUM_STATIC 12]); |
| 780 | extern void edwards25519_pepadd_alt(uint64_t p3[static 16],uint64_t p1[static 16],uint64_t p2[static 12]); | 982 | extern void edwards25519_pepadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 16],const uint64_t p1[S2N_BIGNUM_STATIC 16],const uint64_t p2[S2N_BIGNUM_STATIC 12]); |
| 983 | |||
| 984 | // Scalar multiplication by standard basepoint for edwards25519 (Ed25519) | ||
| 985 | // Input scalar[4]; output res[8] | ||
| 986 | extern void edwards25519_scalarmulbase(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4]); | ||
| 987 | extern void edwards25519_scalarmulbase_alt(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4]); | ||
| 988 | |||
| 989 | // Double scalar multiplication for edwards25519, fresh and base point | ||
| 990 | // Input scalar[4], point[8], bscalar[4]; output res[8] | ||
| 991 | extern void edwards25519_scalarmuldouble(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4], const uint64_t point[S2N_BIGNUM_STATIC 8],const uint64_t bscalar[S2N_BIGNUM_STATIC 4]); | ||
| 992 | extern void edwards25519_scalarmuldouble_alt(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4], const uint64_t point[S2N_BIGNUM_STATIC 8],const uint64_t bscalar[S2N_BIGNUM_STATIC 4]); | ||
| 993 | |||
| 994 | // Scalar product of 2-element polynomial vectors in NTT domain, with mulcache | ||
| 995 | // Inputs a[512], b[512], bt[256] (signed 16-bit words); output r[256] (signed 16-bit words) | ||
| 996 | extern void mlkem_basemul_k2(int16_t r[S2N_BIGNUM_STATIC 256],const int16_t a[S2N_BIGNUM_STATIC 512],const int16_t b[S2N_BIGNUM_STATIC 512],const int16_t bt[S2N_BIGNUM_STATIC 256]); | ||
| 997 | |||
| 998 | // Scalar product of 3-element polynomial vectors in NTT domain, with mulcache | ||
| 999 | // Inputs a[768], b[768], bt[384] (signed 16-bit words); output r[256] (signed 16-bit words) | ||
| 1000 | extern void mlkem_basemul_k3(int16_t r[S2N_BIGNUM_STATIC 256],const int16_t a[S2N_BIGNUM_STATIC 768],const int16_t b[S2N_BIGNUM_STATIC 768],const int16_t bt[S2N_BIGNUM_STATIC 384]); | ||
| 1001 | |||
| 1002 | // Scalar product of 4-element polynomial vectors in NTT domain, with mulcache | ||
| 1003 | // Inputs a[1024], b[1024], bt[512] (signed 16-bit words); output r[256] (signed 16-bit words) | ||
| 1004 | extern void mlkem_basemul_k4(int16_t r[S2N_BIGNUM_STATIC 256],const int16_t a[S2N_BIGNUM_STATIC 1024],const int16_t b[S2N_BIGNUM_STATIC 1024],const int16_t bt[S2N_BIGNUM_STATIC 512]); | ||
| 1005 | |||
| 1006 | // Inverse number-theoretic transform from ML-KEM | ||
| 1007 | // Input a[256] (signed 16-bit words), z_01234[80] (signed 16-bit words), z_56[384] (signed 16-bit words); output a[256] (signed 16-bit words) | ||
| 1008 | extern void mlkem_intt(int16_t a[S2N_BIGNUM_STATIC 256],const int16_t z_01234[S2N_BIGNUM_STATIC 80],const int16_t z_56[S2N_BIGNUM_STATIC 384]); | ||
| 1009 | |||
| 1010 | // Precompute the mulcache data for a polynomial in the NTT domain | ||
| 1011 | // Inputs a[256], z[128] and t[128] (signed 16-bit words); output x[128] (signed 16-bit words) | ||
| 1012 | extern void mlkem_mulcache_compute(int16_t x[S2N_BIGNUM_STATIC 128],const int16_t a[S2N_BIGNUM_STATIC 256],const int16_t z[S2N_BIGNUM_STATIC 128],const int16_t t[S2N_BIGNUM_STATIC 128]); | ||
| 1013 | |||
| 1014 | // Forward number-theoretic transform from ML-KEM | ||
| 1015 | // Input a[256] (signed 16-bit words), z_01234[80] (signed 16-bit words), z_56[384] (signed 16-bit words); output a[256] (signed 16-bit words) | ||
| 1016 | extern void mlkem_ntt(int16_t a[S2N_BIGNUM_STATIC 256],const int16_t z_01234[S2N_BIGNUM_STATIC 80],const int16_t z_56[S2N_BIGNUM_STATIC 384]); | ||
| 1017 | |||
| 1018 | // Canonical modular reduction of polynomial coefficients for ML-KEM | ||
| 1019 | // Input a[256] (signed 16-bit words); output a[256] (signed 16-bit words) | ||
| 1020 | extern void mlkem_reduce(int16_t a[S2N_BIGNUM_STATIC 256]); | ||
| 1021 | |||
| 1022 | // Pack ML-KEM polynomial coefficients as 12-bit numbers | ||
| 1023 | // Input a[256] (signed 16-bit words); output r[384] (bytes) | ||
| 1024 | extern void mlkem_tobytes(uint8_t r[S2N_BIGNUM_STATIC 384],const int16_t a[S2N_BIGNUM_STATIC 256]); | ||
| 1025 | |||
| 1026 | // Conversion of ML-KEM polynomial coefficients to Montgomery form | ||
| 1027 | // Input a[256] (signed 16-bit words); output a[256] (signed 16-bit words) | ||
| 1028 | extern void mlkem_tomont(int16_t a[S2N_BIGNUM_STATIC 256]); | ||
| 1029 | |||
| 1030 | // Uniform rejection sampling for ML-KEM | ||
| 1031 | // Inputs *buf (unsigned bytes), buflen, table (unsigned bytes); output r[256] (signed 16-bit words), return | ||
| 1032 | extern uint64_t mlkem_rej_uniform_VARIABLE_TIME(int16_t r[S2N_BIGNUM_STATIC 256],const uint8_t *buf,uint64_t buflen,const uint8_t *table); | ||
| 781 | 1033 | ||
| 782 | // Point addition on NIST curve P-256 in Montgomery-Jacobian coordinates | 1034 | // Point addition on NIST curve P-256 in Montgomery-Jacobian coordinates |
| 783 | // Inputs p1[12], p2[12]; output p3[12] | 1035 | // Inputs p1[12], p2[12]; output p3[12] |
| 784 | extern void p256_montjadd(uint64_t p3[static 12],uint64_t p1[static 12],uint64_t p2[static 12]); | 1036 | extern void p256_montjadd(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 12]); |
| 1037 | extern void p256_montjadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 12]); | ||
| 785 | 1038 | ||
| 786 | // Point doubling on NIST curve P-256 in Montgomery-Jacobian coordinates | 1039 | // Point doubling on NIST curve P-256 in Montgomery-Jacobian coordinates |
| 787 | // Inputs p1[12]; output p3[12] | 1040 | // Inputs p1[12]; output p3[12] |
| 788 | extern void p256_montjdouble(uint64_t p3[static 12],uint64_t p1[static 12]); | 1041 | extern void p256_montjdouble(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]); |
| 1042 | extern void p256_montjdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]); | ||
| 789 | 1043 | ||
| 790 | // Point mixed addition on NIST curve P-256 in Montgomery-Jacobian coordinates | 1044 | // Point mixed addition on NIST curve P-256 in Montgomery-Jacobian coordinates |
| 791 | // Inputs p1[12], p2[8]; output p3[12] | 1045 | // Inputs p1[12], p2[8]; output p3[12] |
| 792 | extern void p256_montjmixadd(uint64_t p3[static 12],uint64_t p1[static 12],uint64_t p2[static 8]); | 1046 | extern void p256_montjmixadd(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 8]); |
| 1047 | extern void p256_montjmixadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 8]); | ||
| 1048 | |||
| 1049 | // Montgomery-Jacobian form scalar multiplication for P-256 | ||
| 1050 | // Input scalar[4], point[12]; output res[12] | ||
| 1051 | extern void p256_montjscalarmul(uint64_t res[S2N_BIGNUM_STATIC 12],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 12]); | ||
| 1052 | extern void p256_montjscalarmul_alt(uint64_t res[S2N_BIGNUM_STATIC 12],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 12]); | ||
| 1053 | |||
| 1054 | // Scalar multiplication for NIST curve P-256 | ||
| 1055 | // Input scalar[4], point[8]; output res[8] | ||
| 1056 | extern void p256_scalarmul(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 8]); | ||
| 1057 | extern void p256_scalarmul_alt(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 8]); | ||
| 1058 | |||
| 1059 | // Scalar multiplication for precomputed point on NIST curve P-256 | ||
| 1060 | // Input scalar[4], blocksize, table[]; output res[8] | ||
| 1061 | extern void p256_scalarmulbase(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4],uint64_t blocksize,const uint64_t *table); | ||
| 1062 | extern void p256_scalarmulbase_alt(uint64_t res[S2N_BIGNUM_STATIC 8],const uint64_t scalar[S2N_BIGNUM_STATIC 4],uint64_t blocksize,const uint64_t *table); | ||
| 793 | 1063 | ||
| 794 | // Point addition on NIST curve P-384 in Montgomery-Jacobian coordinates | 1064 | // Point addition on NIST curve P-384 in Montgomery-Jacobian coordinates |
| 795 | // Inputs p1[18], p2[18]; output p3[18] | 1065 | // Inputs p1[18], p2[18]; output p3[18] |
| 796 | extern void p384_montjadd(uint64_t p3[static 18],uint64_t p1[static 18],uint64_t p2[static 18]); | 1066 | extern void p384_montjadd(uint64_t p3[S2N_BIGNUM_STATIC 18],const uint64_t p1[S2N_BIGNUM_STATIC 18],const uint64_t p2[S2N_BIGNUM_STATIC 18]); |
| 1067 | extern void p384_montjadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 18],const uint64_t p1[S2N_BIGNUM_STATIC 18],const uint64_t p2[S2N_BIGNUM_STATIC 18]); | ||
| 797 | 1068 | ||
| 798 | // Point doubling on NIST curve P-384 in Montgomery-Jacobian coordinates | 1069 | // Point doubling on NIST curve P-384 in Montgomery-Jacobian coordinates |
| 799 | // Inputs p1[18]; output p3[18] | 1070 | // Inputs p1[18]; output p3[18] |
| 800 | extern void p384_montjdouble(uint64_t p3[static 18],uint64_t p1[static 18]); | 1071 | extern void p384_montjdouble(uint64_t p3[S2N_BIGNUM_STATIC 18],const uint64_t p1[S2N_BIGNUM_STATIC 18]); |
| 1072 | extern void p384_montjdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 18],const uint64_t p1[S2N_BIGNUM_STATIC 18]); | ||
| 801 | 1073 | ||
| 802 | // Point mixed addition on NIST curve P-384 in Montgomery-Jacobian coordinates | 1074 | // Point mixed addition on NIST curve P-384 in Montgomery-Jacobian coordinates |
| 803 | // Inputs p1[18], p2[12]; output p3[18] | 1075 | // Inputs p1[18], p2[12]; output p3[18] |
| 804 | extern void p384_montjmixadd(uint64_t p3[static 18],uint64_t p1[static 18],uint64_t p2[static 12]); | 1076 | extern void p384_montjmixadd(uint64_t p3[S2N_BIGNUM_STATIC 18],const uint64_t p1[S2N_BIGNUM_STATIC 18],const uint64_t p2[S2N_BIGNUM_STATIC 12]); |
| 1077 | extern void p384_montjmixadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 18],const uint64_t p1[S2N_BIGNUM_STATIC 18],const uint64_t p2[S2N_BIGNUM_STATIC 12]); | ||
| 1078 | |||
| 1079 | // Montgomery-Jacobian form scalar multiplication for P-384 | ||
| 1080 | // Input scalar[6], point[18]; output res[18] | ||
| 1081 | extern void p384_montjscalarmul(uint64_t res[S2N_BIGNUM_STATIC 18],const uint64_t scalar[S2N_BIGNUM_STATIC 6],const uint64_t point[S2N_BIGNUM_STATIC 18]); | ||
| 1082 | extern void p384_montjscalarmul_alt(uint64_t res[S2N_BIGNUM_STATIC 18],const uint64_t scalar[S2N_BIGNUM_STATIC 6],const uint64_t point[S2N_BIGNUM_STATIC 18]); | ||
| 805 | 1083 | ||
| 806 | // Point addition on NIST curve P-521 in Jacobian coordinates | 1084 | // Point addition on NIST curve P-521 in Jacobian coordinates |
| 807 | // Inputs p1[27], p2[27]; output p3[27] | 1085 | // Inputs p1[27], p2[27]; output p3[27] |
| 808 | extern void p521_jadd(uint64_t p3[static 27],uint64_t p1[static 27],uint64_t p2[static 27]); | 1086 | extern void p521_jadd(uint64_t p3[S2N_BIGNUM_STATIC 27],const uint64_t p1[S2N_BIGNUM_STATIC 27],const uint64_t p2[S2N_BIGNUM_STATIC 27]); |
| 1087 | extern void p521_jadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 27],const uint64_t p1[S2N_BIGNUM_STATIC 27],const uint64_t p2[S2N_BIGNUM_STATIC 27]); | ||
| 809 | 1088 | ||
| 810 | // Point doubling on NIST curve P-521 in Jacobian coordinates | 1089 | // Point doubling on NIST curve P-521 in Jacobian coordinates |
| 811 | // Input p1[27]; output p3[27] | 1090 | // Input p1[27]; output p3[27] |
| 812 | extern void p521_jdouble(uint64_t p3[static 27],uint64_t p1[static 27]); | 1091 | extern void p521_jdouble(uint64_t p3[S2N_BIGNUM_STATIC 27],const uint64_t p1[S2N_BIGNUM_STATIC 27]); |
| 1092 | extern void p521_jdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 27],const uint64_t p1[S2N_BIGNUM_STATIC 27]); | ||
| 813 | 1093 | ||
| 814 | // Point mixed addition on NIST curve P-521 in Jacobian coordinates | 1094 | // Point mixed addition on NIST curve P-521 in Jacobian coordinates |
| 815 | // Inputs p1[27], p2[18]; output p3[27] | 1095 | // Inputs p1[27], p2[18]; output p3[27] |
| 816 | extern void p521_jmixadd(uint64_t p3[static 27],uint64_t p1[static 27],uint64_t p2[static 18]); | 1096 | extern void p521_jmixadd(uint64_t p3[S2N_BIGNUM_STATIC 27],const uint64_t p1[S2N_BIGNUM_STATIC 27],const uint64_t p2[S2N_BIGNUM_STATIC 18]); |
| 1097 | extern void p521_jmixadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 27],const uint64_t p1[S2N_BIGNUM_STATIC 27],const uint64_t p2[S2N_BIGNUM_STATIC 18]); | ||
| 1098 | |||
| 1099 | // Jacobian form scalar multiplication for P-521 | ||
| 1100 | // Input scalar[9], point[27]; output res[27] | ||
| 1101 | extern void p521_jscalarmul(uint64_t res[S2N_BIGNUM_STATIC 27],const uint64_t scalar[S2N_BIGNUM_STATIC 9],const uint64_t point[S2N_BIGNUM_STATIC 27]); | ||
| 1102 | extern void p521_jscalarmul_alt(uint64_t res[S2N_BIGNUM_STATIC 27],const uint64_t scalar[S2N_BIGNUM_STATIC 9],const uint64_t point[S2N_BIGNUM_STATIC 27]); | ||
| 817 | 1103 | ||
| 818 | // Point addition on SECG curve secp256k1 in Jacobian coordinates | 1104 | // Point addition on SECG curve secp256k1 in Jacobian coordinates |
| 819 | // Inputs p1[12], p2[12]; output p3[12] | 1105 | // Inputs p1[12], p2[12]; output p3[12] |
| 820 | extern void secp256k1_jadd(uint64_t p3[static 12],uint64_t p1[static 12],uint64_t p2[static 12]); | 1106 | extern void secp256k1_jadd(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 12]); |
| 1107 | extern void secp256k1_jadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 12]); | ||
| 821 | 1108 | ||
| 822 | // Point doubling on SECG curve secp256k1 in Jacobian coordinates | 1109 | // Point doubling on SECG curve secp256k1 in Jacobian coordinates |
| 823 | // Input p1[12]; output p3[12] | 1110 | // Input p1[12]; output p3[12] |
| 824 | extern void secp256k1_jdouble(uint64_t p3[static 12],uint64_t p1[static 12]); | 1111 | extern void secp256k1_jdouble(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]); |
| 1112 | extern void secp256k1_jdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]); | ||
| 825 | 1113 | ||
| 826 | // Point mixed addition on SECG curve secp256k1 in Jacobian coordinates | 1114 | // Point mixed addition on SECG curve secp256k1 in Jacobian coordinates |
| 827 | // Inputs p1[12], p2[8]; output p3[12] | 1115 | // Inputs p1[12], p2[8]; output p3[12] |
| 828 | extern void secp256k1_jmixadd(uint64_t p3[static 12],uint64_t p1[static 12],uint64_t p2[static 8]); | 1116 | extern void secp256k1_jmixadd(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 8]); |
| 1117 | extern void secp256k1_jmixadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 8]); | ||
| 1118 | |||
| 1119 | // Keccak-f1600 permutation for SHA3 | ||
| 1120 | // Inputs a[25], rc[24]; output a[25] | ||
| 1121 | extern void sha3_keccak_f1600(uint64_t a[S2N_BIGNUM_STATIC 25],const uint64_t rc[S2N_BIGNUM_STATIC 24]); | ||
| 1122 | extern void sha3_keccak_f1600_alt(uint64_t a[S2N_BIGNUM_STATIC 25],const uint64_t rc[S2N_BIGNUM_STATIC 24]); | ||
| 1123 | |||
| 1124 | // Batched 2-way Keccak-f1600 permutation for SHA3 | ||
| 1125 | // Inputs a[50], rc[24]; output a[50] | ||
| 1126 | extern void sha3_keccak2_f1600(uint64_t a[S2N_BIGNUM_STATIC 50],const uint64_t rc[S2N_BIGNUM_STATIC 24]); | ||
| 1127 | extern void sha3_keccak2_f1600_alt(uint64_t a[S2N_BIGNUM_STATIC 50],const uint64_t rc[S2N_BIGNUM_STATIC 24]); | ||
| 1128 | |||
| 1129 | // Batched 4-way Keccak-f1600 permutation for SHA3 | ||
| 1130 | // Inputs a[100], rc[24]; output a[100] | ||
| 1131 | extern void sha3_keccak4_f1600(uint64_t a[S2N_BIGNUM_STATIC 100],const uint64_t rc[S2N_BIGNUM_STATIC 24]); | ||
| 1132 | extern void sha3_keccak4_f1600_alt(uint64_t a[S2N_BIGNUM_STATIC 100],const uint64_t rc[S2N_BIGNUM_STATIC 24]); | ||
| 1133 | extern void sha3_keccak4_f1600_alt2(uint64_t a[S2N_BIGNUM_STATIC 100],const uint64_t rc[S2N_BIGNUM_STATIC 24]); | ||
| 1134 | |||
| 1135 | // Point addition on CC curve SM2 in Montgomery-Jacobian coordinates | ||
| 1136 | // Inputs p1[12], p2[12]; output p3[12] | ||
| 1137 | extern void sm2_montjadd(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 12]); | ||
| 1138 | extern void sm2_montjadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 12]); | ||
| 1139 | |||
| 1140 | // Point doubling on CC curve SM2 in Montgomery-Jacobian coordinates | ||
| 1141 | // Inputs p1[12]; output p3[12] | ||
| 1142 | extern void sm2_montjdouble(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]); | ||
| 1143 | extern void sm2_montjdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12]); | ||
| 1144 | |||
| 1145 | // Point mixed addition on CC curve SM2 in Montgomery-Jacobian coordinates | ||
| 1146 | // Inputs p1[12], p2[8]; output p3[12] | ||
| 1147 | extern void sm2_montjmixadd(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 8]); | ||
| 1148 | extern void sm2_montjmixadd_alt(uint64_t p3[S2N_BIGNUM_STATIC 12],const uint64_t p1[S2N_BIGNUM_STATIC 12],const uint64_t p2[S2N_BIGNUM_STATIC 8]); | ||
| 1149 | |||
| 1150 | // Montgomery-Jacobian form scalar multiplication for CC curve SM2 | ||
| 1151 | // Input scalar[4], point[12]; output res[12] | ||
| 1152 | extern void sm2_montjscalarmul(uint64_t res[S2N_BIGNUM_STATIC 12],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 12]); | ||
| 1153 | extern void sm2_montjscalarmul_alt(uint64_t res[S2N_BIGNUM_STATIC 12],const uint64_t scalar[S2N_BIGNUM_STATIC 4],const uint64_t point[S2N_BIGNUM_STATIC 12]); | ||
| 829 | 1154 | ||
| 830 | // Reverse the bytes in a single word | 1155 | // Reverse the bytes in a single word |
| 831 | // Input a; output function return | 1156 | // Input a; output function return |
| @@ -839,6 +1164,10 @@ extern uint64_t word_clz (uint64_t a); | |||
| 839 | // Input a; output function return | 1164 | // Input a; output function return |
| 840 | extern uint64_t word_ctz (uint64_t a); | 1165 | extern uint64_t word_ctz (uint64_t a); |
| 841 | 1166 | ||
| 1167 | // Perform 59 "divstep" iterations and return signed matrix of updates | ||
| 1168 | // Inputs d, f, g; output m[2][2] and function return | ||
| 1169 | extern int64_t word_divstep59(int64_t m[2][2],int64_t d,uint64_t f,uint64_t g); | ||
| 1170 | |||
| 842 | // Return maximum of two unsigned 64-bit words | 1171 | // Return maximum of two unsigned 64-bit words |
| 843 | // Inputs a, b; output function return | 1172 | // Inputs a, b; output function return |
| 844 | extern uint64_t word_max (uint64_t a, uint64_t b); | 1173 | extern uint64_t word_max (uint64_t a, uint64_t b); |
| @@ -851,6 +1180,10 @@ extern uint64_t word_min (uint64_t a, uint64_t b); | |||
| 851 | // Input a; output function return | 1180 | // Input a; output function return |
| 852 | extern uint64_t word_negmodinv (uint64_t a); | 1181 | extern uint64_t word_negmodinv (uint64_t a); |
| 853 | 1182 | ||
| 1183 | // Count number of set bits in a single 64-bit word (population count) | ||
| 1184 | // Input a; output function return | ||
| 1185 | extern uint64_t word_popcount (uint64_t a); | ||
| 1186 | |||
| 854 | // Single-word reciprocal, 2^64 + ret = ceil(2^128/a) - 1 if MSB of "a" is set | 1187 | // Single-word reciprocal, 2^64 + ret = ceil(2^128/a) - 1 if MSB of "a" is set |
| 855 | // Input a; output function return | 1188 | // Input a; output function return |
| 856 | extern uint64_t word_recip (uint64_t a); | 1189 | extern uint64_t word_recip (uint64_t a); |
diff --git a/src/lib/libcrypto/bn/s2n_bignum_internal.h b/src/lib/libcrypto/bn/s2n_bignum_internal.h index b82db7d019..37eebb4fd6 100644 --- a/src/lib/libcrypto/bn/s2n_bignum_internal.h +++ b/src/lib/libcrypto/bn/s2n_bignum_internal.h | |||
| @@ -1,3 +1,5 @@ | |||
| 1 | // $OpenBSD: s2n_bignum_internal.h,v 1.5 2025/08/12 10:01:37 jsing Exp $ | ||
| 2 | // | ||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | // | 4 | // |
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | 5 | // Permission to use, copy, modify, and/or distribute this software for any |
| @@ -14,14 +16,14 @@ | |||
| 14 | 16 | ||
| 15 | #ifdef __APPLE__ | 17 | #ifdef __APPLE__ |
| 16 | # define S2N_BN_SYMBOL(NAME) _##NAME | 18 | # define S2N_BN_SYMBOL(NAME) _##NAME |
| 19 | # if defined(__AARCH64EL__) || defined(__ARMEL__) | ||
| 20 | # define __LF %% | ||
| 21 | # else | ||
| 22 | # define __LF ; | ||
| 23 | # endif | ||
| 17 | #else | 24 | #else |
| 18 | # define S2N_BN_SYMBOL(name) name | 25 | # define S2N_BN_SYMBOL(name) name |
| 19 | #endif | 26 | # define __LF ; |
| 20 | |||
| 21 | #ifdef __CET__ | ||
| 22 | # include <cet.h> | ||
| 23 | #else | ||
| 24 | # define _CET_ENDBR | ||
| 25 | #endif | 27 | #endif |
| 26 | 28 | ||
| 27 | #define S2N_BN_SYM_VISIBILITY_DIRECTIVE(name) .globl S2N_BN_SYMBOL(name) | 29 | #define S2N_BN_SYM_VISIBILITY_DIRECTIVE(name) .globl S2N_BN_SYMBOL(name) |
| @@ -34,3 +36,24 @@ | |||
| 34 | #else | 36 | #else |
| 35 | # define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) /* NO-OP: S2N_BN_SYM_PRIVACY_DIRECTIVE */ | 37 | # define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) /* NO-OP: S2N_BN_SYM_PRIVACY_DIRECTIVE */ |
| 36 | #endif | 38 | #endif |
| 39 | |||
| 40 | // Enable indirect branch tracking support unless explicitly disabled | ||
| 41 | // with -DNO_IBT. If the platform supports CET, simply inherit this from | ||
| 42 | // the usual header. Otherwise manually define _CET_ENDBR, used at each | ||
| 43 | // x86 entry point, to be the ENDBR64 instruction, with an explicit byte | ||
| 44 | // sequence for compilers/assemblers that don't know about it. Note that | ||
| 45 | // it is safe to use ENDBR64 on all platforms, since the encoding is by | ||
| 46 | // design interpreted as a NOP on all pre-CET x86_64 processors. The only | ||
| 47 | // downside is a small increase in code size and potentially a modest | ||
| 48 | // slowdown from executing one more instruction. | ||
| 49 | |||
| 50 | #if NO_IBT | ||
| 51 | # if defined(_CET_ENDBR) | ||
| 52 | # error "The s2n-bignum build option NO_IBT was configured, but _CET_ENDBR is defined in this compilation unit. That is weird, so failing the build." | ||
| 53 | # endif | ||
| 54 | # define _CET_ENDBR | ||
| 55 | #elif defined(__CET__) | ||
| 56 | # include <cet.h> | ||
| 57 | #elif !defined(_CET_ENDBR) | ||
| 58 | # define _CET_ENDBR .byte 0xf3,0x0f,0x1e,0xfa | ||
| 59 | #endif | ||
