diff options
Diffstat (limited to 'src/lib/libcrypto/bn/arch/amd64')
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_add.S | 165 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S | 155 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S | 138 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_mul.S | 167 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S | 157 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S | 244 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S | 197 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S | 145 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S | 242 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_sub.S | 153 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bn_arch.c | 131 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bn_arch.h | 109 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/word_clz.S | 60 |
13 files changed, 0 insertions, 2063 deletions
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S b/src/lib/libcrypto/bn/arch/amd64/bignum_add.S deleted file mode 100644 index 5fe4aae7a1..0000000000 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S +++ /dev/null | |||
| @@ -1,165 +0,0 @@ | |||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 2 | // | ||
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 4 | // purpose with or without fee is hereby granted, provided that the above | ||
| 5 | // copyright notice and this permission notice appear in all copies. | ||
| 6 | // | ||
| 7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 10 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 12 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 13 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 14 | |||
| 15 | // ---------------------------------------------------------------------------- | ||
| 16 | // Add, z := x + y | ||
| 17 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] | ||
| 18 | // | ||
| 19 | // extern uint64_t bignum_add | ||
| 20 | // (uint64_t p, uint64_t *z, | ||
| 21 | // uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | ||
| 22 | // | ||
| 23 | // Does the z := x + y operation, truncating modulo p words in general and | ||
| 24 | // returning a top carry (0 or 1) in the p'th place, only adding the input | ||
| 25 | // words below p (as well as m and n respectively) to get the sum and carry. | ||
| 26 | // | ||
| 27 | // Standard x86-64 ABI: RDI = p, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y, returns RAX | ||
| 28 | // Microsoft x64 ABI: RCX = p, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y, returns RAX | ||
| 29 | // ---------------------------------------------------------------------------- | ||
| 30 | |||
| 31 | #include "s2n_bignum_internal.h" | ||
| 32 | |||
| 33 | .intel_syntax noprefix | ||
| 34 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_add) | ||
| 35 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_add) | ||
| 36 | .text | ||
| 37 | |||
| 38 | #define p rdi | ||
| 39 | #define z rsi | ||
| 40 | #define m rdx | ||
| 41 | #define x rcx | ||
| 42 | #define n r8 | ||
| 43 | #define y r9 | ||
| 44 | #define i r10 | ||
| 45 | #define a rax | ||
| 46 | |||
| 47 | #define ashort eax | ||
| 48 | |||
| 49 | |||
| 50 | |||
| 51 | S2N_BN_SYMBOL(bignum_add): | ||
| 52 | _CET_ENDBR | ||
| 53 | |||
| 54 | #if WINDOWS_ABI | ||
| 55 | push rdi | ||
| 56 | push rsi | ||
| 57 | mov rdi, rcx | ||
| 58 | mov rsi, rdx | ||
| 59 | mov rdx, r8 | ||
| 60 | mov rcx, r9 | ||
| 61 | mov r8, [rsp+56] | ||
| 62 | mov r9, [rsp+64] | ||
| 63 | #endif | ||
| 64 | |||
| 65 | // Zero the main index counter for both branches | ||
| 66 | |||
| 67 | xor i, i | ||
| 68 | |||
| 69 | // First clamp the two input sizes m := min(p,m) and n := min(p,n) since | ||
| 70 | // we'll never need words past the p'th. Can now assume m <= p and n <= p. | ||
| 71 | // Then compare the modified m and n and branch accordingly | ||
| 72 | |||
| 73 | cmp p, m | ||
| 74 | cmovc m, p | ||
| 75 | cmp p, n | ||
| 76 | cmovc n, p | ||
| 77 | cmp m, n | ||
| 78 | jc ylonger | ||
| 79 | |||
| 80 | // The case where x is longer or of the same size (p >= m >= n) | ||
| 81 | |||
| 82 | sub p, m | ||
| 83 | sub m, n | ||
| 84 | inc m | ||
| 85 | test n, n | ||
| 86 | jz xtest | ||
| 87 | xmainloop: | ||
| 88 | mov a, [x+8*i] | ||
| 89 | adc a, [y+8*i] | ||
| 90 | mov [z+8*i],a | ||
| 91 | inc i | ||
| 92 | dec n | ||
| 93 | jnz xmainloop | ||
| 94 | jmp xtest | ||
| 95 | xtoploop: | ||
| 96 | mov a, [x+8*i] | ||
| 97 | adc a, 0 | ||
| 98 | mov [z+8*i],a | ||
| 99 | inc i | ||
| 100 | xtest: | ||
| 101 | dec m | ||
| 102 | jnz xtoploop | ||
| 103 | mov ashort, 0 | ||
| 104 | adc a, 0 | ||
| 105 | test p, p | ||
| 106 | jnz tails | ||
| 107 | #if WINDOWS_ABI | ||
| 108 | pop rsi | ||
| 109 | pop rdi | ||
| 110 | #endif | ||
| 111 | ret | ||
| 112 | |||
| 113 | // The case where y is longer (p >= n > m) | ||
| 114 | |||
| 115 | ylonger: | ||
| 116 | |||
| 117 | sub p, n | ||
| 118 | sub n, m | ||
| 119 | test m, m | ||
| 120 | jz ytoploop | ||
| 121 | ymainloop: | ||
| 122 | mov a, [x+8*i] | ||
| 123 | adc a, [y+8*i] | ||
| 124 | mov [z+8*i],a | ||
| 125 | inc i | ||
| 126 | dec m | ||
| 127 | jnz ymainloop | ||
| 128 | ytoploop: | ||
| 129 | mov a, [y+8*i] | ||
| 130 | adc a, 0 | ||
| 131 | mov [z+8*i],a | ||
| 132 | inc i | ||
| 133 | dec n | ||
| 134 | jnz ytoploop | ||
| 135 | mov ashort, 0 | ||
| 136 | adc a, 0 | ||
| 137 | test p, p | ||
| 138 | jnz tails | ||
| 139 | #if WINDOWS_ABI | ||
| 140 | pop rsi | ||
| 141 | pop rdi | ||
| 142 | #endif | ||
| 143 | ret | ||
| 144 | |||
| 145 | // Adding a non-trivial tail, when p > max(m,n) | ||
| 146 | |||
| 147 | tails: | ||
| 148 | mov [z+8*i],a | ||
| 149 | xor a, a | ||
| 150 | jmp tail | ||
| 151 | tailloop: | ||
| 152 | mov [z+8*i],a | ||
| 153 | tail: | ||
| 154 | inc i | ||
| 155 | dec p | ||
| 156 | jnz tailloop | ||
| 157 | #if WINDOWS_ABI | ||
| 158 | pop rsi | ||
| 159 | pop rdi | ||
| 160 | #endif | ||
| 161 | ret | ||
| 162 | |||
| 163 | #if defined(__linux__) && defined(__ELF__) | ||
| 164 | .section .note.GNU-stack,"",%progbits | ||
| 165 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S deleted file mode 100644 index 25ba17bce2..0000000000 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S +++ /dev/null | |||
| @@ -1,155 +0,0 @@ | |||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 2 | // | ||
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 4 | // purpose with or without fee is hereby granted, provided that the above | ||
| 5 | // copyright notice and this permission notice appear in all copies. | ||
| 6 | // | ||
| 7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 10 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 12 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 13 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 14 | |||
| 15 | // ---------------------------------------------------------------------------- | ||
| 16 | // Multiply-add with single-word multiplier, z := z + c * y | ||
| 17 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] | ||
| 18 | // | ||
| 19 | // extern uint64_t bignum_cmadd | ||
| 20 | // (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); | ||
| 21 | // | ||
| 22 | // Does the "z := z + c * y" operation where y is n digits, result z is p. | ||
| 23 | // Truncates the result in general. | ||
| 24 | // | ||
| 25 | // The return value is a high/carry word that is meaningful when p = n + 1, or | ||
| 26 | // more generally when n <= p and the result fits in p + 1 digits. In these | ||
| 27 | // cases it gives the top digit of the (p + 1)-digit result. | ||
| 28 | // | ||
| 29 | // Standard x86-64 ABI: RDI = k, RSI = z, RDX = c, RCX = n, R8 = y, returns RAX | ||
| 30 | // Microsoft x64 ABI: RCX = k, RDX = z, R8 = c, R9 = n, [RSP+40] = y, returns RAX | ||
| 31 | // ---------------------------------------------------------------------------- | ||
| 32 | |||
| 33 | #include "s2n_bignum_internal.h" | ||
| 34 | |||
| 35 | .intel_syntax noprefix | ||
| 36 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmadd) | ||
| 37 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmadd) | ||
| 38 | .text | ||
| 39 | |||
| 40 | #define p rdi | ||
| 41 | #define z rsi | ||
| 42 | #define c r9 | ||
| 43 | #define n rcx | ||
| 44 | #define x r8 | ||
| 45 | |||
| 46 | #define i r10 | ||
| 47 | #define h r11 | ||
| 48 | |||
| 49 | #define r rbx | ||
| 50 | |||
| 51 | #define hshort r11d | ||
| 52 | #define ishort r10d | ||
| 53 | |||
| 54 | |||
| 55 | |||
| 56 | S2N_BN_SYMBOL(bignum_cmadd): | ||
| 57 | _CET_ENDBR | ||
| 58 | |||
| 59 | #if WINDOWS_ABI | ||
| 60 | push rdi | ||
| 61 | push rsi | ||
| 62 | mov rdi, rcx | ||
| 63 | mov rsi, rdx | ||
| 64 | mov rdx, r8 | ||
| 65 | mov rcx, r9 | ||
| 66 | mov r8, [rsp+56] | ||
| 67 | #endif | ||
| 68 | |||
| 69 | // Seems hard to avoid one more register | ||
| 70 | |||
| 71 | push rbx | ||
| 72 | |||
| 73 | // First clamp the input size n := min(p,n) since we can never need to read | ||
| 74 | // past the p'th term of the input to generate p-digit output. | ||
| 75 | // Subtract p := p - min(n,p) so it holds the size of the extra tail needed | ||
| 76 | |||
| 77 | cmp p, n | ||
| 78 | cmovc n, p | ||
| 79 | sub p, n | ||
| 80 | |||
| 81 | // Initialize high part h = 0; if n = 0 do nothing but return that zero | ||
| 82 | |||
| 83 | xor h, h | ||
| 84 | test n, n | ||
| 85 | jz end | ||
| 86 | |||
| 87 | // Move c into a safer register as multiplies overwrite rdx | ||
| 88 | |||
| 89 | mov c, rdx | ||
| 90 | |||
| 91 | // Initialization of the loop: 2^64 * CF + [h,z_0'] = z_0 + c * x_0 | ||
| 92 | |||
| 93 | mov rax, [x] | ||
| 94 | mul c | ||
| 95 | add [z], rax | ||
| 96 | mov h, rdx | ||
| 97 | mov ishort, 1 | ||
| 98 | dec n | ||
| 99 | jz hightail | ||
| 100 | |||
| 101 | // Main loop, where we always have CF + previous high part h to add in | ||
| 102 | |||
| 103 | loop: | ||
| 104 | adc h, [z+8*i] | ||
| 105 | sbb r, r | ||
| 106 | mov rax, [x+8*i] | ||
| 107 | mul c | ||
| 108 | sub rdx, r | ||
| 109 | add rax, h | ||
| 110 | mov [z+8*i], rax | ||
| 111 | mov h, rdx | ||
| 112 | inc i | ||
| 113 | dec n | ||
| 114 | jnz loop | ||
| 115 | |||
| 116 | hightail: | ||
| 117 | adc h, 0 | ||
| 118 | |||
| 119 | // Propagate the carry all the way to the end with h as extra carry word | ||
| 120 | |||
| 121 | tail: | ||
| 122 | test p, p | ||
| 123 | jz end | ||
| 124 | |||
| 125 | add [z+8*i], h | ||
| 126 | mov hshort, 0 | ||
| 127 | inc i | ||
| 128 | dec p | ||
| 129 | jz highend | ||
| 130 | |||
| 131 | tloop: | ||
| 132 | adc [z+8*i], h | ||
| 133 | inc i | ||
| 134 | dec p | ||
| 135 | jnz tloop | ||
| 136 | |||
| 137 | highend: | ||
| 138 | |||
| 139 | adc h, 0 | ||
| 140 | |||
| 141 | // Return the high/carry word | ||
| 142 | |||
| 143 | end: | ||
| 144 | mov rax, h | ||
| 145 | |||
| 146 | pop rbx | ||
| 147 | #if WINDOWS_ABI | ||
| 148 | pop rsi | ||
| 149 | pop rdi | ||
| 150 | #endif | ||
| 151 | ret | ||
| 152 | |||
| 153 | #if defined(__linux__) && defined(__ELF__) | ||
| 154 | .section .note.GNU-stack,"",%progbits | ||
| 155 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S deleted file mode 100644 index 12f785d63a..0000000000 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S +++ /dev/null | |||
| @@ -1,138 +0,0 @@ | |||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 2 | // | ||
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 4 | // purpose with or without fee is hereby granted, provided that the above | ||
| 5 | // copyright notice and this permission notice appear in all copies. | ||
| 6 | // | ||
| 7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 10 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 12 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 13 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 14 | |||
| 15 | // ---------------------------------------------------------------------------- | ||
| 16 | // Multiply by a single word, z := c * y | ||
| 17 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] | ||
| 18 | // | ||
| 19 | // extern uint64_t bignum_cmul | ||
| 20 | // (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); | ||
| 21 | // | ||
| 22 | // Does the "z := c * y" operation where y is n digits, result z is p. | ||
| 23 | // Truncates the result in general unless p >= n + 1. | ||
| 24 | // | ||
| 25 | // The return value is a high/carry word that is meaningful when p >= n as | ||
| 26 | // giving the high part of the result. Since this is always zero if p > n, | ||
| 27 | // it is mainly of interest in the special case p = n, i.e. where the source | ||
| 28 | // and destination have the same nominal size, when it gives the extra word | ||
| 29 | // of the full result. | ||
| 30 | // | ||
| 31 | // Standard x86-64 ABI: RDI = k, RSI = z, RDX = c, RCX = n, R8 = y, returns RAX | ||
| 32 | // Microsoft x64 ABI: RCX = k, RDX = z, R8 = c, R9 = n, [RSP+40] = y, returns RAX | ||
| 33 | // ---------------------------------------------------------------------------- | ||
| 34 | |||
| 35 | #include "s2n_bignum_internal.h" | ||
| 36 | |||
| 37 | .intel_syntax noprefix | ||
| 38 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul) | ||
| 39 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul) | ||
| 40 | .text | ||
| 41 | |||
| 42 | #define p rdi | ||
| 43 | #define z rsi | ||
| 44 | #define c r9 | ||
| 45 | #define n rcx | ||
| 46 | #define x r8 | ||
| 47 | |||
| 48 | #define i r10 | ||
| 49 | #define h r11 | ||
| 50 | |||
| 51 | |||
| 52 | |||
| 53 | S2N_BN_SYMBOL(bignum_cmul): | ||
| 54 | _CET_ENDBR | ||
| 55 | |||
| 56 | #if WINDOWS_ABI | ||
| 57 | push rdi | ||
| 58 | push rsi | ||
| 59 | mov rdi, rcx | ||
| 60 | mov rsi, rdx | ||
| 61 | mov rdx, r8 | ||
| 62 | mov rcx, r9 | ||
| 63 | mov r8, [rsp+56] | ||
| 64 | #endif | ||
| 65 | |||
| 66 | // First clamp the input size n := min(p,n) since we can never need to read | ||
| 67 | // past the p'th term of the input to generate p-digit output. Now we can | ||
| 68 | // assume that n <= p | ||
| 69 | |||
| 70 | cmp p, n | ||
| 71 | cmovc n, p | ||
| 72 | |||
| 73 | // Initialize current input/output pointer offset i and high part h. | ||
| 74 | // But then if n = 0 skip the multiplication and go to the tail part | ||
| 75 | |||
| 76 | xor h, h | ||
| 77 | xor i, i | ||
| 78 | test n, n | ||
| 79 | jz tail | ||
| 80 | |||
| 81 | // Move c into a safer register as multiplies overwrite rdx | ||
| 82 | |||
| 83 | mov c, rdx | ||
| 84 | |||
| 85 | // Initialization of the loop: [h,l] = c * x_0 | ||
| 86 | |||
| 87 | mov rax, [x] | ||
| 88 | mul c | ||
| 89 | mov [z], rax | ||
| 90 | mov h, rdx | ||
| 91 | inc i | ||
| 92 | cmp i, n | ||
| 93 | jz tail | ||
| 94 | |||
| 95 | // Main loop doing the multiplications | ||
| 96 | |||
| 97 | loop: | ||
| 98 | mov rax, [x+8*i] | ||
| 99 | mul c | ||
| 100 | add rax, h | ||
| 101 | adc rdx, 0 | ||
| 102 | mov [z+8*i], rax | ||
| 103 | mov h, rdx | ||
| 104 | inc i | ||
| 105 | cmp i, n | ||
| 106 | jc loop | ||
| 107 | |||
| 108 | // Add a tail when the destination is longer | ||
| 109 | |||
| 110 | tail: | ||
| 111 | cmp i, p | ||
| 112 | jnc end | ||
| 113 | mov [z+8*i], h | ||
| 114 | xor h, h | ||
| 115 | inc i | ||
| 116 | cmp i, p | ||
| 117 | jnc end | ||
| 118 | |||
| 119 | tloop: | ||
| 120 | mov [z+8*i], h | ||
| 121 | inc i | ||
| 122 | cmp i, p | ||
| 123 | jc tloop | ||
| 124 | |||
| 125 | // Return the high/carry word | ||
| 126 | |||
| 127 | end: | ||
| 128 | mov rax, h | ||
| 129 | |||
| 130 | #if WINDOWS_ABI | ||
| 131 | pop rsi | ||
| 132 | pop rdi | ||
| 133 | #endif | ||
| 134 | ret | ||
| 135 | |||
| 136 | #if defined(__linux__) && defined(__ELF__) | ||
| 137 | .section .note.GNU-stack,"",%progbits | ||
| 138 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S deleted file mode 100644 index a3552679a2..0000000000 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S +++ /dev/null | |||
| @@ -1,167 +0,0 @@ | |||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 2 | // | ||
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 4 | // purpose with or without fee is hereby granted, provided that the above | ||
| 5 | // copyright notice and this permission notice appear in all copies. | ||
| 6 | // | ||
| 7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 10 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 12 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 13 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 14 | |||
| 15 | // ---------------------------------------------------------------------------- | ||
| 16 | // Multiply z := x * y | ||
| 17 | // Inputs x[m], y[n]; output z[k] | ||
| 18 | // | ||
| 19 | // extern void bignum_mul | ||
| 20 | // (uint64_t k, uint64_t *z, | ||
| 21 | // uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | ||
| 22 | // | ||
| 23 | // Does the "z := x * y" operation where x is m digits, y is n, result z is k. | ||
| 24 | // Truncates the result in general unless k >= m + n | ||
| 25 | // | ||
| 26 | // Standard x86-64 ABI: RDI = k, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y | ||
| 27 | // Microsoft x64 ABI: RCX = k, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y | ||
| 28 | // ---------------------------------------------------------------------------- | ||
| 29 | |||
| 30 | #include "s2n_bignum_internal.h" | ||
| 31 | |||
| 32 | .intel_syntax noprefix | ||
| 33 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul) | ||
| 34 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul) | ||
| 35 | .text | ||
| 36 | |||
| 37 | // These are actually right | ||
| 38 | |||
| 39 | #define p rdi | ||
| 40 | #define z rsi | ||
| 41 | #define n r8 | ||
| 42 | |||
| 43 | // These are not | ||
| 44 | |||
| 45 | #define c r15 | ||
| 46 | #define h r14 | ||
| 47 | #define l r13 | ||
| 48 | #define x r12 | ||
| 49 | #define y r11 | ||
| 50 | #define i rbx | ||
| 51 | #define k r10 | ||
| 52 | #define m rbp | ||
| 53 | |||
| 54 | // These are always local scratch since multiplier result is in these | ||
| 55 | |||
| 56 | #define a rax | ||
| 57 | #define d rdx | ||
| 58 | |||
| 59 | |||
| 60 | |||
| 61 | S2N_BN_SYMBOL(bignum_mul): | ||
| 62 | _CET_ENDBR | ||
| 63 | |||
| 64 | #if WINDOWS_ABI | ||
| 65 | push rdi | ||
| 66 | push rsi | ||
| 67 | mov rdi, rcx | ||
| 68 | mov rsi, rdx | ||
| 69 | mov rdx, r8 | ||
| 70 | mov rcx, r9 | ||
| 71 | mov r8, [rsp+56] | ||
| 72 | mov r9, [rsp+64] | ||
| 73 | #endif | ||
| 74 | |||
| 75 | // We use too many registers, and also we need rax:rdx for multiplications | ||
| 76 | |||
| 77 | push rbx | ||
| 78 | push rbp | ||
| 79 | push r12 | ||
| 80 | push r13 | ||
| 81 | push r14 | ||
| 82 | push r15 | ||
| 83 | mov m, rdx | ||
| 84 | |||
| 85 | // If the result size is zero, do nothing | ||
| 86 | // Note that even if either or both inputs has size zero, we can't | ||
| 87 | // just give up because we at least need to zero the output array | ||
| 88 | // If we did a multiply-add variant, however, then we could | ||
| 89 | |||
| 90 | test p, p | ||
| 91 | jz end | ||
| 92 | |||
| 93 | // Set initial 2-part sum to zero (we zero c inside the body) | ||
| 94 | |||
| 95 | xor h,h | ||
| 96 | xor l,l | ||
| 97 | |||
| 98 | // Otherwise do outer loop k = 0 ... k = p - 1 | ||
| 99 | |||
| 100 | xor k, k | ||
| 101 | |||
| 102 | outerloop: | ||
| 103 | |||
| 104 | // Zero our carry term first; we eventually want it and a zero is useful now | ||
| 105 | // Set a = max 0 (k + 1 - n), i = min (k + 1) m | ||
| 106 | // This defines the range a <= j < i for the inner summation | ||
| 107 | // Note that since k < p < 2^64 we can assume k + 1 doesn't overflow | ||
| 108 | // And since we want to increment it anyway, we might as well do it now | ||
| 109 | |||
| 110 | xor c, c // c = 0 | ||
| 111 | inc k // k = k + 1 | ||
| 112 | |||
| 113 | mov a, k // a = k + 1 | ||
| 114 | sub a, n // a = k + 1 - n | ||
| 115 | cmovc a, c // a = max 0 (k + 1 - n) | ||
| 116 | |||
| 117 | mov i, m // i = m | ||
| 118 | cmp k, m // CF <=> k + 1 < m | ||
| 119 | cmovc i, k // i = min (k + 1) m | ||
| 120 | |||
| 121 | // Turn i into a loop count, and skip things if it's <= 0 | ||
| 122 | // Otherwise set up initial pointers x -> x0[a] and y -> y0[k - a] | ||
| 123 | // and then launch into the main inner loop, postdecrementing i | ||
| 124 | |||
| 125 | mov d, k | ||
| 126 | sub d, i | ||
| 127 | sub i, a | ||
| 128 | jbe innerend | ||
| 129 | lea x,[rcx+8*a] | ||
| 130 | lea y,[r9+8*d-8] | ||
| 131 | |||
| 132 | innerloop: | ||
| 133 | mov rax, [y+8*i] | ||
| 134 | mul QWORD PTR [x] | ||
| 135 | add x, 8 | ||
| 136 | add l, rax | ||
| 137 | adc h, rdx | ||
| 138 | adc c, 0 | ||
| 139 | dec i | ||
| 140 | jnz innerloop | ||
| 141 | |||
| 142 | innerend: | ||
| 143 | |||
| 144 | mov [z], l | ||
| 145 | mov l, h | ||
| 146 | mov h, c | ||
| 147 | add z, 8 | ||
| 148 | |||
| 149 | cmp k, p | ||
| 150 | jc outerloop | ||
| 151 | |||
| 152 | end: | ||
| 153 | pop r15 | ||
| 154 | pop r14 | ||
| 155 | pop r13 | ||
| 156 | pop r12 | ||
| 157 | pop rbp | ||
| 158 | pop rbx | ||
| 159 | #if WINDOWS_ABI | ||
| 160 | pop rsi | ||
| 161 | pop rdi | ||
| 162 | #endif | ||
| 163 | ret | ||
| 164 | |||
| 165 | #if defined(__linux__) && defined(__ELF__) | ||
| 166 | .section .note.GNU-stack,"",%progbits | ||
| 167 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S deleted file mode 100644 index 70ff69e372..0000000000 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S +++ /dev/null | |||
| @@ -1,157 +0,0 @@ | |||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 2 | // | ||
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 4 | // purpose with or without fee is hereby granted, provided that the above | ||
| 5 | // copyright notice and this permission notice appear in all copies. | ||
| 6 | // | ||
| 7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 10 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 12 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 13 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 14 | |||
| 15 | // ---------------------------------------------------------------------------- | ||
| 16 | // Multiply z := x * y | ||
| 17 | // Inputs x[4], y[4]; output z[8] | ||
| 18 | // | ||
| 19 | // extern void bignum_mul_4_8_alt | ||
| 20 | // (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]); | ||
| 21 | // | ||
| 22 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y | ||
| 23 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y | ||
| 24 | // ---------------------------------------------------------------------------- | ||
| 25 | |||
| 26 | #include "s2n_bignum_internal.h" | ||
| 27 | |||
| 28 | .intel_syntax noprefix | ||
| 29 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_4_8_alt) | ||
| 30 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_4_8_alt) | ||
| 31 | .text | ||
| 32 | |||
| 33 | // These are actually right | ||
| 34 | |||
| 35 | #define z rdi | ||
| 36 | #define x rsi | ||
| 37 | |||
| 38 | // This is moved from rdx to free it for muls | ||
| 39 | |||
| 40 | #define y rcx | ||
| 41 | |||
| 42 | // Other variables used as a rotating 3-word window to add terms to | ||
| 43 | |||
| 44 | #define t0 r8 | ||
| 45 | #define t1 r9 | ||
| 46 | #define t2 r10 | ||
| 47 | |||
| 48 | // Macro for the key "multiply and add to (c,h,l)" step | ||
| 49 | |||
| 50 | #define combadd(c,h,l,numa,numb) \ | ||
| 51 | mov rax, numa; \ | ||
| 52 | mul QWORD PTR numb; \ | ||
| 53 | add l, rax; \ | ||
| 54 | adc h, rdx; \ | ||
| 55 | adc c, 0 | ||
| 56 | |||
| 57 | // A minutely shorter form for when c = 0 initially | ||
| 58 | |||
| 59 | #define combadz(c,h,l,numa,numb) \ | ||
| 60 | mov rax, numa; \ | ||
| 61 | mul QWORD PTR numb; \ | ||
| 62 | add l, rax; \ | ||
| 63 | adc h, rdx; \ | ||
| 64 | adc c, c | ||
| 65 | |||
| 66 | // A short form where we don't expect a top carry | ||
| 67 | |||
| 68 | #define combads(h,l,numa,numb) \ | ||
| 69 | mov rax, numa; \ | ||
| 70 | mul QWORD PTR numb; \ | ||
| 71 | add l, rax; \ | ||
| 72 | adc h, rdx | ||
| 73 | |||
| 74 | S2N_BN_SYMBOL(bignum_mul_4_8_alt): | ||
| 75 | _CET_ENDBR | ||
| 76 | |||
| 77 | #if WINDOWS_ABI | ||
| 78 | push rdi | ||
| 79 | push rsi | ||
| 80 | mov rdi, rcx | ||
| 81 | mov rsi, rdx | ||
| 82 | mov rdx, r8 | ||
| 83 | #endif | ||
| 84 | |||
| 85 | // Copy y into a safe register to start with | ||
| 86 | |||
| 87 | mov y, rdx | ||
| 88 | |||
| 89 | // Result term 0 | ||
| 90 | |||
| 91 | mov rax, [x] | ||
| 92 | mul QWORD PTR [y] | ||
| 93 | |||
| 94 | mov [z], rax | ||
| 95 | mov t0, rdx | ||
| 96 | xor t1, t1 | ||
| 97 | |||
| 98 | // Result term 1 | ||
| 99 | |||
| 100 | xor t2, t2 | ||
| 101 | combads(t1,t0,[x],[y+8]) | ||
| 102 | combadz(t2,t1,t0,[x+8],[y]) | ||
| 103 | mov [z+8], t0 | ||
| 104 | |||
| 105 | // Result term 2 | ||
| 106 | |||
| 107 | xor t0, t0 | ||
| 108 | combadz(t0,t2,t1,[x],[y+16]) | ||
| 109 | combadd(t0,t2,t1,[x+8],[y+8]) | ||
| 110 | combadd(t0,t2,t1,[x+16],[y]) | ||
| 111 | mov [z+16], t1 | ||
| 112 | |||
| 113 | // Result term 3 | ||
| 114 | |||
| 115 | xor t1, t1 | ||
| 116 | combadz(t1,t0,t2,[x],[y+24]) | ||
| 117 | combadd(t1,t0,t2,[x+8],[y+16]) | ||
| 118 | combadd(t1,t0,t2,[x+16],[y+8]) | ||
| 119 | combadd(t1,t0,t2,[x+24],[y]) | ||
| 120 | mov [z+24], t2 | ||
| 121 | |||
| 122 | // Result term 4 | ||
| 123 | |||
| 124 | xor t2, t2 | ||
| 125 | combadz(t2,t1,t0,[x+8],[y+24]) | ||
| 126 | combadd(t2,t1,t0,[x+16],[y+16]) | ||
| 127 | combadd(t2,t1,t0,[x+24],[y+8]) | ||
| 128 | mov [z+32], t0 | ||
| 129 | |||
| 130 | // Result term 5 | ||
| 131 | |||
| 132 | xor t0, t0 | ||
| 133 | combadz(t0,t2,t1,[x+16],[y+24]) | ||
| 134 | combadd(t0,t2,t1,[x+24],[y+16]) | ||
| 135 | mov [z+40], t1 | ||
| 136 | |||
| 137 | // Result term 6 | ||
| 138 | |||
| 139 | xor t1, t1 | ||
| 140 | combads(t0,t2,[x+24],[y+24]) | ||
| 141 | mov [z+48], t2 | ||
| 142 | |||
| 143 | // Result term 7 | ||
| 144 | |||
| 145 | mov [z+56], t0 | ||
| 146 | |||
| 147 | // Return | ||
| 148 | |||
| 149 | #if WINDOWS_ABI | ||
| 150 | pop rsi | ||
| 151 | pop rdi | ||
| 152 | #endif | ||
| 153 | ret | ||
| 154 | |||
| 155 | #if defined(__linux__) && defined(__ELF__) | ||
| 156 | .section .note.GNU-stack,"",%progbits | ||
| 157 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S deleted file mode 100644 index 066403b074..0000000000 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S +++ /dev/null | |||
| @@ -1,244 +0,0 @@ | |||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 2 | // | ||
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 4 | // purpose with or without fee is hereby granted, provided that the above | ||
| 5 | // copyright notice and this permission notice appear in all copies. | ||
| 6 | // | ||
| 7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 10 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 12 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 13 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 14 | |||
| 15 | // ---------------------------------------------------------------------------- | ||
| 16 | // Multiply z := x * y | ||
| 17 | // Inputs x[8], y[8]; output z[16] | ||
| 18 | // | ||
| 19 | // extern void bignum_mul_8_16_alt | ||
| 20 | // (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]); | ||
| 21 | // | ||
| 22 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y | ||
| 23 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y | ||
| 24 | // ---------------------------------------------------------------------------- | ||
| 25 | |||
| 26 | #include "s2n_bignum_internal.h" | ||
| 27 | |||
| 28 | .intel_syntax noprefix | ||
| 29 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_8_16_alt) | ||
| 30 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_8_16_alt) | ||
| 31 | .text | ||
| 32 | |||
| 33 | // These are actually right | ||
| 34 | |||
| 35 | #define z rdi | ||
| 36 | #define x rsi | ||
| 37 | |||
| 38 | // This is moved from rdx to free it for muls | ||
| 39 | |||
| 40 | #define y rcx | ||
| 41 | |||
| 42 | // Other variables used as a rotating 3-word window to add terms to | ||
| 43 | |||
| 44 | #define t0 r8 | ||
| 45 | #define t1 r9 | ||
| 46 | #define t2 r10 | ||
| 47 | |||
| 48 | // Macro for the key "multiply and add to (c,h,l)" step | ||
| 49 | |||
| 50 | #define combadd(c,h,l,numa,numb) \ | ||
| 51 | mov rax, numa; \ | ||
| 52 | mul QWORD PTR numb; \ | ||
| 53 | add l, rax; \ | ||
| 54 | adc h, rdx; \ | ||
| 55 | adc c, 0 | ||
| 56 | |||
| 57 | // A minutely shorter form for when c = 0 initially | ||
| 58 | |||
| 59 | #define combadz(c,h,l,numa,numb) \ | ||
| 60 | mov rax, numa; \ | ||
| 61 | mul QWORD PTR numb; \ | ||
| 62 | add l, rax; \ | ||
| 63 | adc h, rdx; \ | ||
| 64 | adc c, c | ||
| 65 | |||
| 66 | // A short form where we don't expect a top carry | ||
| 67 | |||
| 68 | #define combads(h,l,numa,numb) \ | ||
| 69 | mov rax, numa; \ | ||
| 70 | mul QWORD PTR numb; \ | ||
| 71 | add l, rax; \ | ||
| 72 | adc h, rdx | ||
| 73 | |||
| 74 | S2N_BN_SYMBOL(bignum_mul_8_16_alt): | ||
| 75 | _CET_ENDBR | ||
| 76 | |||
| 77 | #if WINDOWS_ABI | ||
| 78 | push rdi | ||
| 79 | push rsi | ||
| 80 | mov rdi, rcx | ||
| 81 | mov rsi, rdx | ||
| 82 | mov rdx, r8 | ||
| 83 | #endif | ||
| 84 | |||
| 85 | // Copy y into a safe register to start with | ||
| 86 | |||
| 87 | mov y, rdx | ||
| 88 | |||
| 89 | // Result term 0 | ||
| 90 | |||
| 91 | mov rax, [x] | ||
| 92 | mul QWORD PTR [y] | ||
| 93 | |||
| 94 | mov [z], rax | ||
| 95 | mov t0, rdx | ||
| 96 | xor t1, t1 | ||
| 97 | |||
| 98 | // Result term 1 | ||
| 99 | |||
| 100 | xor t2, t2 | ||
| 101 | combads(t1,t0,[x],[y+8]) | ||
| 102 | combadz(t2,t1,t0,[x+8],[y]) | ||
| 103 | mov [z+8], t0 | ||
| 104 | |||
| 105 | // Result term 2 | ||
| 106 | |||
| 107 | xor t0, t0 | ||
| 108 | combadz(t0,t2,t1,[x],[y+16]) | ||
| 109 | combadd(t0,t2,t1,[x+8],[y+8]) | ||
| 110 | combadd(t0,t2,t1,[x+16],[y]) | ||
| 111 | mov [z+16], t1 | ||
| 112 | |||
| 113 | // Result term 3 | ||
| 114 | |||
| 115 | xor t1, t1 | ||
| 116 | combadz(t1,t0,t2,[x],[y+24]) | ||
| 117 | combadd(t1,t0,t2,[x+8],[y+16]) | ||
| 118 | combadd(t1,t0,t2,[x+16],[y+8]) | ||
| 119 | combadd(t1,t0,t2,[x+24],[y]) | ||
| 120 | mov [z+24], t2 | ||
| 121 | |||
| 122 | // Result term 4 | ||
| 123 | |||
| 124 | xor t2, t2 | ||
| 125 | combadz(t2,t1,t0,[x],[y+32]) | ||
| 126 | combadd(t2,t1,t0,[x+8],[y+24]) | ||
| 127 | combadd(t2,t1,t0,[x+16],[y+16]) | ||
| 128 | combadd(t2,t1,t0,[x+24],[y+8]) | ||
| 129 | combadd(t2,t1,t0,[x+32],[y]) | ||
| 130 | mov [z+32], t0 | ||
| 131 | |||
| 132 | // Result term 5 | ||
| 133 | |||
| 134 | xor t0, t0 | ||
| 135 | combadz(t0,t2,t1,[x],[y+40]) | ||
| 136 | combadd(t0,t2,t1,[x+8],[y+32]) | ||
| 137 | combadd(t0,t2,t1,[x+16],[y+24]) | ||
| 138 | combadd(t0,t2,t1,[x+24],[y+16]) | ||
| 139 | combadd(t0,t2,t1,[x+32],[y+8]) | ||
| 140 | combadd(t0,t2,t1,[x+40],[y]) | ||
| 141 | mov [z+40], t1 | ||
| 142 | |||
| 143 | // Result term 6 | ||
| 144 | |||
| 145 | xor t1, t1 | ||
| 146 | combadz(t1,t0,t2,[x],[y+48]) | ||
| 147 | combadd(t1,t0,t2,[x+8],[y+40]) | ||
| 148 | combadd(t1,t0,t2,[x+16],[y+32]) | ||
| 149 | combadd(t1,t0,t2,[x+24],[y+24]) | ||
| 150 | combadd(t1,t0,t2,[x+32],[y+16]) | ||
| 151 | combadd(t1,t0,t2,[x+40],[y+8]) | ||
| 152 | combadd(t1,t0,t2,[x+48],[y]) | ||
| 153 | mov [z+48], t2 | ||
| 154 | |||
| 155 | // Result term 7 | ||
| 156 | |||
| 157 | xor t2, t2 | ||
| 158 | combadz(t2,t1,t0,[x],[y+56]) | ||
| 159 | combadd(t2,t1,t0,[x+8],[y+48]) | ||
| 160 | combadd(t2,t1,t0,[x+16],[y+40]) | ||
| 161 | combadd(t2,t1,t0,[x+24],[y+32]) | ||
| 162 | combadd(t2,t1,t0,[x+32],[y+24]) | ||
| 163 | combadd(t2,t1,t0,[x+40],[y+16]) | ||
| 164 | combadd(t2,t1,t0,[x+48],[y+8]) | ||
| 165 | combadd(t2,t1,t0,[x+56],[y]) | ||
| 166 | mov [z+56], t0 | ||
| 167 | |||
| 168 | // Result term 8 | ||
| 169 | |||
| 170 | xor t0, t0 | ||
| 171 | combadz(t0,t2,t1,[x+8],[y+56]) | ||
| 172 | combadd(t0,t2,t1,[x+16],[y+48]) | ||
| 173 | combadd(t0,t2,t1,[x+24],[y+40]) | ||
| 174 | combadd(t0,t2,t1,[x+32],[y+32]) | ||
| 175 | combadd(t0,t2,t1,[x+40],[y+24]) | ||
| 176 | combadd(t0,t2,t1,[x+48],[y+16]) | ||
| 177 | combadd(t0,t2,t1,[x+56],[y+8]) | ||
| 178 | mov [z+64], t1 | ||
| 179 | |||
| 180 | // Result term 9 | ||
| 181 | |||
| 182 | xor t1, t1 | ||
| 183 | combadz(t1,t0,t2,[x+16],[y+56]) | ||
| 184 | combadd(t1,t0,t2,[x+24],[y+48]) | ||
| 185 | combadd(t1,t0,t2,[x+32],[y+40]) | ||
| 186 | combadd(t1,t0,t2,[x+40],[y+32]) | ||
| 187 | combadd(t1,t0,t2,[x+48],[y+24]) | ||
| 188 | combadd(t1,t0,t2,[x+56],[y+16]) | ||
| 189 | mov [z+72], t2 | ||
| 190 | |||
| 191 | // Result term 10 | ||
| 192 | |||
| 193 | xor t2, t2 | ||
| 194 | combadz(t2,t1,t0,[x+24],[y+56]) | ||
| 195 | combadd(t2,t1,t0,[x+32],[y+48]) | ||
| 196 | combadd(t2,t1,t0,[x+40],[y+40]) | ||
| 197 | combadd(t2,t1,t0,[x+48],[y+32]) | ||
| 198 | combadd(t2,t1,t0,[x+56],[y+24]) | ||
| 199 | mov [z+80], t0 | ||
| 200 | |||
| 201 | // Result term 11 | ||
| 202 | |||
| 203 | xor t0, t0 | ||
| 204 | combadz(t0,t2,t1,[x+32],[y+56]) | ||
| 205 | combadd(t0,t2,t1,[x+40],[y+48]) | ||
| 206 | combadd(t0,t2,t1,[x+48],[y+40]) | ||
| 207 | combadd(t0,t2,t1,[x+56],[y+32]) | ||
| 208 | mov [z+88], t1 | ||
| 209 | |||
| 210 | // Result term 12 | ||
| 211 | |||
| 212 | xor t1, t1 | ||
| 213 | combadz(t1,t0,t2,[x+40],[y+56]) | ||
| 214 | combadd(t1,t0,t2,[x+48],[y+48]) | ||
| 215 | combadd(t1,t0,t2,[x+56],[y+40]) | ||
| 216 | mov [z+96], t2 | ||
| 217 | |||
| 218 | // Result term 13 | ||
| 219 | |||
| 220 | xor t2, t2 | ||
| 221 | combadz(t2,t1,t0,[x+48],[y+56]) | ||
| 222 | combadd(t2,t1,t0,[x+56],[y+48]) | ||
| 223 | mov [z+104], t0 | ||
| 224 | |||
| 225 | // Result term 14 | ||
| 226 | |||
| 227 | combads(t2,t1,[x+56],[y+56]) | ||
| 228 | mov [z+112], t1 | ||
| 229 | |||
| 230 | // Result term 11 | ||
| 231 | |||
| 232 | mov [z+120], t2 | ||
| 233 | |||
| 234 | // Return | ||
| 235 | |||
| 236 | #if WINDOWS_ABI | ||
| 237 | pop rsi | ||
| 238 | pop rdi | ||
| 239 | #endif | ||
| 240 | ret | ||
| 241 | |||
| 242 | #if defined(__linux__) && defined(__ELF__) | ||
| 243 | .section .note.GNU-stack,"",%progbits | ||
| 244 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S deleted file mode 100644 index 54e3f59442..0000000000 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S +++ /dev/null | |||
| @@ -1,197 +0,0 @@ | |||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 2 | // | ||
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 4 | // purpose with or without fee is hereby granted, provided that the above | ||
| 5 | // copyright notice and this permission notice appear in all copies. | ||
| 6 | // | ||
| 7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 10 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 12 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 13 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 14 | |||
| 15 | // ---------------------------------------------------------------------------- | ||
| 16 | // Square z := x^2 | ||
| 17 | // Input x[n]; output z[k] | ||
| 18 | // | ||
| 19 | // extern void bignum_sqr | ||
| 20 | // (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x); | ||
| 21 | // | ||
| 22 | // Does the "z := x^2" operation where x is n digits and result z is k. | ||
| 23 | // Truncates the result in general unless k >= 2 * n | ||
| 24 | // | ||
| 25 | // Standard x86-64 ABI: RDI = k, RSI = z, RDX = n, RCX = x | ||
| 26 | // Microsoft x64 ABI: RCX = k, RDX = z, R8 = n, R9 = x | ||
| 27 | // ---------------------------------------------------------------------------- | ||
| 28 | |||
| 29 | #include "s2n_bignum_internal.h" | ||
| 30 | |||
| 31 | .intel_syntax noprefix | ||
| 32 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr) | ||
| 33 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr) | ||
| 34 | .text | ||
| 35 | |||
| 36 | // First three are where arguments come in, but n is moved. | ||
| 37 | |||
| 38 | #define p rdi | ||
| 39 | #define z rsi | ||
| 40 | #define x rcx | ||
| 41 | #define n r8 | ||
| 42 | |||
| 43 | // These are always local scratch since multiplier result is in these | ||
| 44 | |||
| 45 | #define a rax | ||
| 46 | #define d rdx | ||
| 47 | |||
| 48 | // Other variables | ||
| 49 | |||
| 50 | #define i rbx | ||
| 51 | #define ll rbp | ||
| 52 | #define hh r9 | ||
| 53 | #define k r10 | ||
| 54 | #define y r11 | ||
| 55 | #define htop r12 | ||
| 56 | #define l r13 | ||
| 57 | #define h r14 | ||
| 58 | #define c r15 | ||
| 59 | |||
| 60 | // Short versions | ||
| 61 | |||
| 62 | #define llshort ebp | ||
| 63 | |||
| 64 | S2N_BN_SYMBOL(bignum_sqr): | ||
| 65 | _CET_ENDBR | ||
| 66 | |||
| 67 | #if WINDOWS_ABI | ||
| 68 | push rdi | ||
| 69 | push rsi | ||
| 70 | mov rdi, rcx | ||
| 71 | mov rsi, rdx | ||
| 72 | mov rdx, r8 | ||
| 73 | mov rcx, r9 | ||
| 74 | #endif | ||
| 75 | |||
| 76 | // We use too many registers, and also we need rax:rdx for multiplications | ||
| 77 | |||
| 78 | push rbx | ||
| 79 | push rbp | ||
| 80 | push r12 | ||
| 81 | push r13 | ||
| 82 | push r14 | ||
| 83 | push r15 | ||
| 84 | mov n, rdx | ||
| 85 | |||
| 86 | // If p = 0 the result is trivial and nothing needs doing | ||
| 87 | |||
| 88 | test p, p | ||
| 89 | jz end | ||
| 90 | |||
| 91 | // initialize (hh,ll) = 0 | ||
| 92 | |||
| 93 | xor llshort, llshort | ||
| 94 | xor hh, hh | ||
| 95 | |||
| 96 | // Iterate outer loop from k = 0 ... k = p - 1 producing result digits | ||
| 97 | |||
| 98 | xor k, k | ||
| 99 | |||
| 100 | outerloop: | ||
| 101 | |||
| 102 | // First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n | ||
| 103 | // We want to accumulate all x[i] * x[k - i] for bot <= i < top | ||
| 104 | // For the optimization of squaring we avoid duplication and do | ||
| 105 | // 2 * x[i] * x[k - i] for i < htop, where htop = MIN ((k+1)/2) n | ||
| 106 | // Initialize i = bot; in fact just compute bot as i directly. | ||
| 107 | |||
| 108 | xor c, c | ||
| 109 | lea i, [k+1] | ||
| 110 | mov htop, i | ||
| 111 | shr htop, 1 | ||
| 112 | sub i, n | ||
| 113 | cmovc i, c | ||
| 114 | cmp htop, n | ||
| 115 | cmovnc htop, n | ||
| 116 | |||
| 117 | // Initialize the three-part local sum (c,h,l); c was already done above | ||
| 118 | |||
| 119 | xor l, l | ||
| 120 | xor h, h | ||
| 121 | |||
| 122 | // If htop <= bot then main doubled part of the sum is empty | ||
| 123 | |||
| 124 | cmp i, htop | ||
| 125 | jnc nosumming | ||
| 126 | |||
| 127 | // Use a moving pointer for [y] = x[k-i] for the cofactor | ||
| 128 | |||
| 129 | mov a, k | ||
| 130 | sub a, i | ||
| 131 | lea y, [x+8*a] | ||
| 132 | |||
| 133 | // Do the main part of the sum x[i] * x[k - i] for 2 * i < k | ||
| 134 | |||
| 135 | innerloop: | ||
| 136 | mov a, [x+8*i] | ||
| 137 | mul QWORD PTR [y] | ||
| 138 | add l, a | ||
| 139 | adc h, d | ||
| 140 | adc c, 0 | ||
| 141 | sub y, 8 | ||
| 142 | inc i | ||
| 143 | cmp i, htop | ||
| 144 | jc innerloop | ||
| 145 | |||
| 146 | // Now double it | ||
| 147 | |||
| 148 | add l, l | ||
| 149 | adc h, h | ||
| 150 | adc c, c | ||
| 151 | |||
| 152 | // If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term | ||
| 153 | |||
| 154 | nosumming: | ||
| 155 | test k, 1 | ||
| 156 | jnz innerend | ||
| 157 | cmp i, n | ||
| 158 | jnc innerend | ||
| 159 | |||
| 160 | mov a, [x+8*i] | ||
| 161 | mul a | ||
| 162 | add l, a | ||
| 163 | adc h, d | ||
| 164 | adc c, 0 | ||
| 165 | |||
| 166 | // Now add the local sum into the global sum, store and shift | ||
| 167 | |||
| 168 | innerend: | ||
| 169 | add l, ll | ||
| 170 | mov [z+8*k], l | ||
| 171 | adc h, hh | ||
| 172 | mov ll, h | ||
| 173 | adc c, 0 | ||
| 174 | mov hh, c | ||
| 175 | |||
| 176 | inc k | ||
| 177 | cmp k, p | ||
| 178 | jc outerloop | ||
| 179 | |||
| 180 | // Restore registers and return | ||
| 181 | |||
| 182 | end: | ||
| 183 | pop r15 | ||
| 184 | pop r14 | ||
| 185 | pop r13 | ||
| 186 | pop r12 | ||
| 187 | pop rbp | ||
| 188 | pop rbx | ||
| 189 | #if WINDOWS_ABI | ||
| 190 | pop rsi | ||
| 191 | pop rdi | ||
| 192 | #endif | ||
| 193 | ret | ||
| 194 | |||
| 195 | #if defined(__linux__) && defined(__ELF__) | ||
| 196 | .section .note.GNU-stack,"",%progbits | ||
| 197 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S deleted file mode 100644 index 7c534ae907..0000000000 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S +++ /dev/null | |||
| @@ -1,145 +0,0 @@ | |||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 2 | // | ||
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 4 | // purpose with or without fee is hereby granted, provided that the above | ||
| 5 | // copyright notice and this permission notice appear in all copies. | ||
| 6 | // | ||
| 7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 10 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 12 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 13 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 14 | |||
| 15 | // ---------------------------------------------------------------------------- | ||
| 16 | // Square, z := x^2 | ||
| 17 | // Input x[4]; output z[8] | ||
| 18 | // | ||
| 19 | // extern void bignum_sqr_4_8_alt | ||
| 20 | // (uint64_t z[static 8], uint64_t x[static 4]); | ||
| 21 | // | ||
| 22 | // Standard x86-64 ABI: RDI = z, RSI = x | ||
| 23 | // Microsoft x64 ABI: RCX = z, RDX = x | ||
| 24 | // ---------------------------------------------------------------------------- | ||
| 25 | |||
| 26 | #include "s2n_bignum_internal.h" | ||
| 27 | |||
| 28 | .intel_syntax noprefix | ||
| 29 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_4_8_alt) | ||
| 30 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_4_8_alt) | ||
| 31 | .text | ||
| 32 | |||
| 33 | // Input arguments | ||
| 34 | |||
| 35 | #define z rdi | ||
| 36 | #define x rsi | ||
| 37 | |||
| 38 | // Other variables used as a rotating 3-word window to add terms to | ||
| 39 | |||
| 40 | #define t0 rcx | ||
| 41 | #define t1 r8 | ||
| 42 | #define t2 r9 | ||
| 43 | |||
| 44 | // Macro for the key "multiply and add to (c,h,l)" step, for square term | ||
| 45 | |||
| 46 | #define combadd1(c,h,l,numa) \ | ||
| 47 | mov rax, numa; \ | ||
| 48 | mul rax; \ | ||
| 49 | add l, rax; \ | ||
| 50 | adc h, rdx; \ | ||
| 51 | adc c, 0 | ||
| 52 | |||
| 53 | // A short form where we don't expect a top carry | ||
| 54 | |||
| 55 | #define combads(h,l,numa) \ | ||
| 56 | mov rax, numa; \ | ||
| 57 | mul rax; \ | ||
| 58 | add l, rax; \ | ||
| 59 | adc h, rdx | ||
| 60 | |||
| 61 | // A version doubling before adding, for non-square terms | ||
| 62 | |||
| 63 | #define combadd2(c,h,l,numa,numb) \ | ||
| 64 | mov rax, numa; \ | ||
| 65 | mul QWORD PTR numb; \ | ||
| 66 | add rax, rax; \ | ||
| 67 | adc rdx, rdx; \ | ||
| 68 | adc c, 0; \ | ||
| 69 | add l, rax; \ | ||
| 70 | adc h, rdx; \ | ||
| 71 | adc c, 0 | ||
| 72 | |||
| 73 | S2N_BN_SYMBOL(bignum_sqr_4_8_alt): | ||
| 74 | _CET_ENDBR | ||
| 75 | |||
| 76 | #if WINDOWS_ABI | ||
| 77 | push rdi | ||
| 78 | push rsi | ||
| 79 | mov rdi, rcx | ||
| 80 | mov rsi, rdx | ||
| 81 | #endif | ||
| 82 | |||
| 83 | // Result term 0 | ||
| 84 | |||
| 85 | mov rax, [x] | ||
| 86 | mul rax | ||
| 87 | |||
| 88 | mov [z], rax | ||
| 89 | mov t0, rdx | ||
| 90 | xor t1, t1 | ||
| 91 | |||
| 92 | // Result term 1 | ||
| 93 | |||
| 94 | xor t2, t2 | ||
| 95 | combadd2(t2,t1,t0,[x],[x+8]) | ||
| 96 | mov [z+8], t0 | ||
| 97 | |||
| 98 | // Result term 2 | ||
| 99 | |||
| 100 | xor t0, t0 | ||
| 101 | combadd1(t0,t2,t1,[x+8]) | ||
| 102 | combadd2(t0,t2,t1,[x],[x+16]) | ||
| 103 | mov [z+16], t1 | ||
| 104 | |||
| 105 | // Result term 3 | ||
| 106 | |||
| 107 | xor t1, t1 | ||
| 108 | combadd2(t1,t0,t2,[x],[x+24]) | ||
| 109 | combadd2(t1,t0,t2,[x+8],[x+16]) | ||
| 110 | mov [z+24], t2 | ||
| 111 | |||
| 112 | // Result term 4 | ||
| 113 | |||
| 114 | xor t2, t2 | ||
| 115 | combadd2(t2,t1,t0,[x+8],[x+24]) | ||
| 116 | combadd1(t2,t1,t0,[x+16]) | ||
| 117 | mov [z+32], t0 | ||
| 118 | |||
| 119 | // Result term 5 | ||
| 120 | |||
| 121 | xor t0, t0 | ||
| 122 | combadd2(t0,t2,t1,[x+16],[x+24]) | ||
| 123 | mov [z+40], t1 | ||
| 124 | |||
| 125 | // Result term 6 | ||
| 126 | |||
| 127 | xor t1, t1 | ||
| 128 | combads(t0,t2,[x+24]) | ||
| 129 | mov [z+48], t2 | ||
| 130 | |||
| 131 | // Result term 7 | ||
| 132 | |||
| 133 | mov [z+56], t0 | ||
| 134 | |||
| 135 | // Return | ||
| 136 | |||
| 137 | #if WINDOWS_ABI | ||
| 138 | pop rsi | ||
| 139 | pop rdi | ||
| 140 | #endif | ||
| 141 | ret | ||
| 142 | |||
| 143 | #if defined(__linux__) && defined(__ELF__) | ||
| 144 | .section .note.GNU-stack,"",%progbits | ||
| 145 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S deleted file mode 100644 index ac0b6f96c2..0000000000 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S +++ /dev/null | |||
| @@ -1,242 +0,0 @@ | |||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 2 | // | ||
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 4 | // purpose with or without fee is hereby granted, provided that the above | ||
| 5 | // copyright notice and this permission notice appear in all copies. | ||
| 6 | // | ||
| 7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 10 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 12 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 13 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 14 | |||
| 15 | // ---------------------------------------------------------------------------- | ||
| 16 | // Square, z := x^2 | ||
| 17 | // Input x[8]; output z[16] | ||
| 18 | // | ||
| 19 | // extern void bignum_sqr_8_16_alt (uint64_t z[static 16], uint64_t x[static 8]); | ||
| 20 | // | ||
| 21 | // Standard x86-64 ABI: RDI = z, RSI = x | ||
| 22 | // Microsoft x64 ABI: RCX = z, RDX = x | ||
| 23 | // ---------------------------------------------------------------------------- | ||
| 24 | |||
| 25 | #include "s2n_bignum_internal.h" | ||
| 26 | |||
| 27 | .intel_syntax noprefix | ||
| 28 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_8_16_alt) | ||
| 29 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_8_16_alt) | ||
| 30 | .text | ||
| 31 | |||
| 32 | // Input arguments | ||
| 33 | |||
| 34 | #define z rdi | ||
| 35 | #define x rsi | ||
| 36 | |||
| 37 | // Other variables used as a rotating 3-word window to add terms to | ||
| 38 | |||
| 39 | #define t0 r8 | ||
| 40 | #define t1 r9 | ||
| 41 | #define t2 r10 | ||
| 42 | |||
| 43 | // Additional temporaries for local windows to share doublings | ||
| 44 | |||
| 45 | #define u0 rcx | ||
| 46 | #define u1 r11 | ||
| 47 | |||
| 48 | // Macro for the key "multiply and add to (c,h,l)" step | ||
| 49 | |||
| 50 | #define combadd(c,h,l,numa,numb) \ | ||
| 51 | mov rax, numa; \ | ||
| 52 | mul QWORD PTR numb; \ | ||
| 53 | add l, rax; \ | ||
| 54 | adc h, rdx; \ | ||
| 55 | adc c, 0 | ||
| 56 | |||
| 57 | // Set up initial window (c,h,l) = numa * numb | ||
| 58 | |||
| 59 | #define combaddz(c,h,l,numa,numb) \ | ||
| 60 | mov rax, numa; \ | ||
| 61 | mul QWORD PTR numb; \ | ||
| 62 | xor c, c; \ | ||
| 63 | mov l, rax; \ | ||
| 64 | mov h, rdx | ||
| 65 | |||
| 66 | // Doubling step (c,h,l) = 2 * (c,hh,ll) + (0,h,l) | ||
| 67 | |||
| 68 | #define doubladd(c,h,l,hh,ll) \ | ||
| 69 | add ll, ll; \ | ||
| 70 | adc hh, hh; \ | ||
| 71 | adc c, c; \ | ||
| 72 | add l, ll; \ | ||
| 73 | adc h, hh; \ | ||
| 74 | adc c, 0 | ||
| 75 | |||
| 76 | // Square term incorporation (c,h,l) += numba^2 | ||
| 77 | |||
| 78 | #define combadd1(c,h,l,numa) \ | ||
| 79 | mov rax, numa; \ | ||
| 80 | mul rax; \ | ||
| 81 | add l, rax; \ | ||
| 82 | adc h, rdx; \ | ||
| 83 | adc c, 0 | ||
| 84 | |||
| 85 | // A short form where we don't expect a top carry | ||
| 86 | |||
| 87 | #define combads(h,l,numa) \ | ||
| 88 | mov rax, numa; \ | ||
| 89 | mul rax; \ | ||
| 90 | add l, rax; \ | ||
| 91 | adc h, rdx | ||
| 92 | |||
| 93 | // A version doubling directly before adding, for single non-square terms | ||
| 94 | |||
| 95 | #define combadd2(c,h,l,numa,numb) \ | ||
| 96 | mov rax, numa; \ | ||
| 97 | mul QWORD PTR numb; \ | ||
| 98 | add rax, rax; \ | ||
| 99 | adc rdx, rdx; \ | ||
| 100 | adc c, 0; \ | ||
| 101 | add l, rax; \ | ||
| 102 | adc h, rdx; \ | ||
| 103 | adc c, 0 | ||
| 104 | |||
| 105 | S2N_BN_SYMBOL(bignum_sqr_8_16_alt): | ||
| 106 | _CET_ENDBR | ||
| 107 | |||
| 108 | #if WINDOWS_ABI | ||
| 109 | push rdi | ||
| 110 | push rsi | ||
| 111 | mov rdi, rcx | ||
| 112 | mov rsi, rdx | ||
| 113 | #endif | ||
| 114 | |||
| 115 | // Result term 0 | ||
| 116 | |||
| 117 | mov rax, [x] | ||
| 118 | mul rax | ||
| 119 | |||
| 120 | mov [z], rax | ||
| 121 | mov t0, rdx | ||
| 122 | xor t1, t1 | ||
| 123 | |||
| 124 | // Result term 1 | ||
| 125 | |||
| 126 | xor t2, t2 | ||
| 127 | combadd2(t2,t1,t0,[x],[x+8]) | ||
| 128 | mov [z+8], t0 | ||
| 129 | |||
| 130 | // Result term 2 | ||
| 131 | |||
| 132 | xor t0, t0 | ||
| 133 | combadd1(t0,t2,t1,[x+8]) | ||
| 134 | combadd2(t0,t2,t1,[x],[x+16]) | ||
| 135 | mov [z+16], t1 | ||
| 136 | |||
| 137 | // Result term 3 | ||
| 138 | |||
| 139 | combaddz(t1,u1,u0,[x],[x+24]) | ||
| 140 | combadd(t1,u1,u0,[x+8],[x+16]) | ||
| 141 | doubladd(t1,t0,t2,u1,u0) | ||
| 142 | mov [z+24], t2 | ||
| 143 | |||
| 144 | // Result term 4 | ||
| 145 | |||
| 146 | combaddz(t2,u1,u0,[x],[x+32]) | ||
| 147 | combadd(t2,u1,u0,[x+8],[x+24]) | ||
| 148 | doubladd(t2,t1,t0,u1,u0) | ||
| 149 | combadd1(t2,t1,t0,[x+16]) | ||
| 150 | mov [z+32], t0 | ||
| 151 | |||
| 152 | // Result term 5 | ||
| 153 | |||
| 154 | combaddz(t0,u1,u0,[x],[x+40]) | ||
| 155 | combadd(t0,u1,u0,[x+8],[x+32]) | ||
| 156 | combadd(t0,u1,u0,[x+16],[x+24]) | ||
| 157 | doubladd(t0,t2,t1,u1,u0) | ||
| 158 | mov [z+40], t1 | ||
| 159 | |||
| 160 | // Result term 6 | ||
| 161 | |||
| 162 | combaddz(t1,u1,u0,[x],[x+48]) | ||
| 163 | combadd(t1,u1,u0,[x+8],[x+40]) | ||
| 164 | combadd(t1,u1,u0,[x+16],[x+32]) | ||
| 165 | doubladd(t1,t0,t2,u1,u0) | ||
| 166 | combadd1(t1,t0,t2,[x+24]) | ||
| 167 | mov [z+48], t2 | ||
| 168 | |||
| 169 | // Result term 7 | ||
| 170 | |||
| 171 | combaddz(t2,u1,u0,[x],[x+56]) | ||
| 172 | combadd(t2,u1,u0,[x+8],[x+48]) | ||
| 173 | combadd(t2,u1,u0,[x+16],[x+40]) | ||
| 174 | combadd(t2,u1,u0,[x+24],[x+32]) | ||
| 175 | doubladd(t2,t1,t0,u1,u0) | ||
| 176 | mov [z+56], t0 | ||
| 177 | |||
| 178 | // Result term 8 | ||
| 179 | |||
| 180 | combaddz(t0,u1,u0,[x+8],[x+56]) | ||
| 181 | combadd(t0,u1,u0,[x+16],[x+48]) | ||
| 182 | combadd(t0,u1,u0,[x+24],[x+40]) | ||
| 183 | doubladd(t0,t2,t1,u1,u0) | ||
| 184 | combadd1(t0,t2,t1,[x+32]) | ||
| 185 | mov [z+64], t1 | ||
| 186 | |||
| 187 | // Result term 9 | ||
| 188 | |||
| 189 | combaddz(t1,u1,u0,[x+16],[x+56]) | ||
| 190 | combadd(t1,u1,u0,[x+24],[x+48]) | ||
| 191 | combadd(t1,u1,u0,[x+32],[x+40]) | ||
| 192 | doubladd(t1,t0,t2,u1,u0) | ||
| 193 | mov [z+72], t2 | ||
| 194 | |||
| 195 | // Result term 10 | ||
| 196 | |||
| 197 | combaddz(t2,u1,u0,[x+24],[x+56]) | ||
| 198 | combadd(t2,u1,u0,[x+32],[x+48]) | ||
| 199 | doubladd(t2,t1,t0,u1,u0) | ||
| 200 | combadd1(t2,t1,t0,[x+40]) | ||
| 201 | mov [z+80], t0 | ||
| 202 | |||
| 203 | // Result term 11 | ||
| 204 | |||
| 205 | combaddz(t0,u1,u0,[x+32],[x+56]) | ||
| 206 | combadd(t0,u1,u0,[x+40],[x+48]) | ||
| 207 | doubladd(t0,t2,t1,u1,u0) | ||
| 208 | mov [z+88], t1 | ||
| 209 | |||
| 210 | // Result term 12 | ||
| 211 | |||
| 212 | xor t1, t1 | ||
| 213 | combadd2(t1,t0,t2,[x+40],[x+56]) | ||
| 214 | combadd1(t1,t0,t2,[x+48]) | ||
| 215 | mov [z+96], t2 | ||
| 216 | |||
| 217 | // Result term 13 | ||
| 218 | |||
| 219 | xor t2, t2 | ||
| 220 | combadd2(t2,t1,t0,[x+48],[x+56]) | ||
| 221 | mov [z+104], t0 | ||
| 222 | |||
| 223 | // Result term 14 | ||
| 224 | |||
| 225 | combads(t2,t1,[x+56]) | ||
| 226 | mov [z+112], t1 | ||
| 227 | |||
| 228 | // Result term 15 | ||
| 229 | |||
| 230 | mov [z+120], t2 | ||
| 231 | |||
| 232 | // Return | ||
| 233 | |||
| 234 | #if WINDOWS_ABI | ||
| 235 | pop rsi | ||
| 236 | pop rdi | ||
| 237 | #endif | ||
| 238 | ret | ||
| 239 | |||
| 240 | #if defined(__linux__) && defined(__ELF__) | ||
| 241 | .section .note.GNU-stack,"",%progbits | ||
| 242 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S deleted file mode 100644 index 3ff8a30510..0000000000 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S +++ /dev/null | |||
| @@ -1,153 +0,0 @@ | |||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 2 | // | ||
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 4 | // purpose with or without fee is hereby granted, provided that the above | ||
| 5 | // copyright notice and this permission notice appear in all copies. | ||
| 6 | // | ||
| 7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 10 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 12 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 13 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 14 | |||
| 15 | // ---------------------------------------------------------------------------- | ||
| 16 | // Subtract, z := x - y | ||
| 17 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] | ||
| 18 | // | ||
| 19 | // extern uint64_t bignum_sub | ||
| 20 | // (uint64_t p, uint64_t *z, | ||
| 21 | // uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | ||
| 22 | // | ||
| 23 | // Does the z := x - y operation, truncating modulo p words in general and | ||
| 24 | // returning a top borrow (0 or 1) in the p'th place, only subtracting input | ||
| 25 | // words below p (as well as m and n respectively) to get the diff and borrow. | ||
| 26 | // | ||
| 27 | // Standard x86-64 ABI: RDI = p, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y, returns RAX | ||
| 28 | // Microsoft x64 ABI: RCX = p, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y, returns RAX | ||
| 29 | // ---------------------------------------------------------------------------- | ||
| 30 | |||
| 31 | #include "s2n_bignum_internal.h" | ||
| 32 | |||
| 33 | .intel_syntax noprefix | ||
| 34 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sub) | ||
| 35 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sub) | ||
| 36 | .text | ||
| 37 | |||
| 38 | #define p rdi | ||
| 39 | #define z rsi | ||
| 40 | #define m rdx | ||
| 41 | #define x rcx | ||
| 42 | #define n r8 | ||
| 43 | #define y r9 | ||
| 44 | #define i r10 | ||
| 45 | #define a rax | ||
| 46 | |||
| 47 | #define ashort eax | ||
| 48 | |||
| 49 | |||
| 50 | |||
| 51 | S2N_BN_SYMBOL(bignum_sub): | ||
| 52 | _CET_ENDBR | ||
| 53 | |||
| 54 | #if WINDOWS_ABI | ||
| 55 | push rdi | ||
| 56 | push rsi | ||
| 57 | mov rdi, rcx | ||
| 58 | mov rsi, rdx | ||
| 59 | mov rdx, r8 | ||
| 60 | mov rcx, r9 | ||
| 61 | mov r8, [rsp+56] | ||
| 62 | mov r9, [rsp+64] | ||
| 63 | #endif | ||
| 64 | |||
| 65 | // Zero the main index counter for both branches | ||
| 66 | |||
| 67 | xor i, i | ||
| 68 | |||
| 69 | // First clamp the two input sizes m := min(p,m) and n := min(p,n) since | ||
| 70 | // we'll never need words past the p'th. Can now assume m <= p and n <= p. | ||
| 71 | // Then compare the modified m and n and branch accordingly | ||
| 72 | |||
| 73 | cmp p, m | ||
| 74 | cmovc m, p | ||
| 75 | cmp p, n | ||
| 76 | cmovc n, p | ||
| 77 | cmp m, n | ||
| 78 | jc ylonger | ||
| 79 | |||
| 80 | // The case where x is longer or of the same size (p >= m >= n) | ||
| 81 | |||
| 82 | sub p, m | ||
| 83 | sub m, n | ||
| 84 | inc m | ||
| 85 | test n, n | ||
| 86 | jz xtest | ||
| 87 | xmainloop: | ||
| 88 | mov a, [x+8*i] | ||
| 89 | sbb a, [y+8*i] | ||
| 90 | mov [z+8*i],a | ||
| 91 | inc i | ||
| 92 | dec n | ||
| 93 | jnz xmainloop | ||
| 94 | jmp xtest | ||
| 95 | xtoploop: | ||
| 96 | mov a, [x+8*i] | ||
| 97 | sbb a, 0 | ||
| 98 | mov [z+8*i],a | ||
| 99 | inc i | ||
| 100 | xtest: | ||
| 101 | dec m | ||
| 102 | jnz xtoploop | ||
| 103 | sbb a, a | ||
| 104 | test p, p | ||
| 105 | jz tailskip | ||
| 106 | tailloop: | ||
| 107 | mov [z+8*i],a | ||
| 108 | inc i | ||
| 109 | dec p | ||
| 110 | jnz tailloop | ||
| 111 | tailskip: | ||
| 112 | neg a | ||
| 113 | #if WINDOWS_ABI | ||
| 114 | pop rsi | ||
| 115 | pop rdi | ||
| 116 | #endif | ||
| 117 | ret | ||
| 118 | |||
| 119 | // The case where y is longer (p >= n > m) | ||
| 120 | |||
| 121 | ylonger: | ||
| 122 | |||
| 123 | sub p, n | ||
| 124 | sub n, m | ||
| 125 | test m, m | ||
| 126 | jz ytoploop | ||
| 127 | ymainloop: | ||
| 128 | mov a, [x+8*i] | ||
| 129 | sbb a, [y+8*i] | ||
| 130 | mov [z+8*i],a | ||
| 131 | inc i | ||
| 132 | dec m | ||
| 133 | jnz ymainloop | ||
| 134 | ytoploop: | ||
| 135 | mov ashort, 0 | ||
| 136 | sbb a, [y+8*i] | ||
| 137 | mov [z+8*i],a | ||
| 138 | inc i | ||
| 139 | dec n | ||
| 140 | jnz ytoploop | ||
| 141 | sbb a, a | ||
| 142 | test p, p | ||
| 143 | jnz tailloop | ||
| 144 | neg a | ||
| 145 | #if WINDOWS_ABI | ||
| 146 | pop rsi | ||
| 147 | pop rdi | ||
| 148 | #endif | ||
| 149 | ret | ||
| 150 | |||
| 151 | #if defined(__linux__) && defined(__ELF__) | ||
| 152 | .section .note.GNU-stack,"",%progbits | ||
| 153 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bn_arch.c b/src/lib/libcrypto/bn/arch/amd64/bn_arch.c deleted file mode 100644 index a377a05681..0000000000 --- a/src/lib/libcrypto/bn/arch/amd64/bn_arch.c +++ /dev/null | |||
| @@ -1,131 +0,0 @@ | |||
| 1 | /* $OpenBSD: bn_arch.c,v 1.7 2023/06/24 16:01:44 jsing Exp $ */ | ||
| 2 | /* | ||
| 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | ||
| 4 | * | ||
| 5 | * Permission to use, copy, modify, and distribute this software for any | ||
| 6 | * purpose with or without fee is hereby granted, provided that the above | ||
| 7 | * copyright notice and this permission notice appear in all copies. | ||
| 8 | * | ||
| 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #include <openssl/bn.h> | ||
| 19 | |||
| 20 | #include "bn_arch.h" | ||
| 21 | #include "bn_local.h" | ||
| 22 | #include "s2n_bignum.h" | ||
| 23 | |||
| 24 | #ifdef HAVE_BN_ADD | ||
| 25 | BN_ULONG | ||
| 26 | bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b, | ||
| 27 | int b_len) | ||
| 28 | { | ||
| 29 | return bignum_add(r_len, (uint64_t *)r, a_len, (uint64_t *)a, | ||
| 30 | b_len, (uint64_t *)b); | ||
| 31 | } | ||
| 32 | #endif | ||
| 33 | |||
| 34 | |||
| 35 | #ifdef HAVE_BN_ADD_WORDS | ||
| 36 | BN_ULONG | ||
| 37 | bn_add_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n) | ||
| 38 | { | ||
| 39 | return bignum_add(n, (uint64_t *)rd, n, (uint64_t *)ad, n, | ||
| 40 | (uint64_t *)bd); | ||
| 41 | } | ||
| 42 | #endif | ||
| 43 | |||
| 44 | #ifdef HAVE_BN_SUB | ||
| 45 | BN_ULONG | ||
| 46 | bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b, | ||
| 47 | int b_len) | ||
| 48 | { | ||
| 49 | return bignum_sub(r_len, (uint64_t *)r, a_len, (uint64_t *)a, | ||
| 50 | b_len, (uint64_t *)b); | ||
| 51 | } | ||
| 52 | #endif | ||
| 53 | |||
| 54 | #ifdef HAVE_BN_SUB_WORDS | ||
| 55 | BN_ULONG | ||
| 56 | bn_sub_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n) | ||
| 57 | { | ||
| 58 | return bignum_sub(n, (uint64_t *)rd, n, (uint64_t *)ad, n, | ||
| 59 | (uint64_t *)bd); | ||
| 60 | } | ||
| 61 | #endif | ||
| 62 | |||
| 63 | #ifdef HAVE_BN_MUL_ADD_WORDS | ||
| 64 | BN_ULONG | ||
| 65 | bn_mul_add_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w) | ||
| 66 | { | ||
| 67 | return bignum_cmadd(num, (uint64_t *)rd, w, num, (uint64_t *)ad); | ||
| 68 | } | ||
| 69 | #endif | ||
| 70 | |||
| 71 | #ifdef HAVE_BN_MUL_WORDS | ||
| 72 | BN_ULONG | ||
| 73 | bn_mul_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w) | ||
| 74 | { | ||
| 75 | return bignum_cmul(num, (uint64_t *)rd, w, num, (uint64_t *)ad); | ||
| 76 | } | ||
| 77 | #endif | ||
| 78 | |||
| 79 | #ifdef HAVE_BN_MUL_COMBA4 | ||
| 80 | void | ||
| 81 | bn_mul_comba4(BN_ULONG *rd, BN_ULONG *ad, BN_ULONG *bd) | ||
| 82 | { | ||
| 83 | /* XXX - consider using non-alt on CPUs that have the ADX extension. */ | ||
| 84 | bignum_mul_4_8_alt((uint64_t *)rd, (uint64_t *)ad, (uint64_t *)bd); | ||
| 85 | } | ||
| 86 | #endif | ||
| 87 | |||
| 88 | #ifdef HAVE_BN_MUL_COMBA8 | ||
| 89 | void | ||
| 90 | bn_mul_comba8(BN_ULONG *rd, BN_ULONG *ad, BN_ULONG *bd) | ||
| 91 | { | ||
| 92 | /* XXX - consider using non-alt on CPUs that have the ADX extension. */ | ||
| 93 | bignum_mul_8_16_alt((uint64_t *)rd, (uint64_t *)ad, (uint64_t *)bd); | ||
| 94 | } | ||
| 95 | #endif | ||
| 96 | |||
| 97 | #ifdef HAVE_BN_SQR | ||
| 98 | int | ||
| 99 | bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx) | ||
| 100 | { | ||
| 101 | bignum_sqr(r_len, (uint64_t *)r->d, a->top, (uint64_t *)a->d); | ||
| 102 | |||
| 103 | return 1; | ||
| 104 | } | ||
| 105 | #endif | ||
| 106 | |||
| 107 | #ifdef HAVE_BN_SQR_COMBA4 | ||
| 108 | void | ||
| 109 | bn_sqr_comba4(BN_ULONG *rd, const BN_ULONG *ad) | ||
| 110 | { | ||
| 111 | /* XXX - consider using non-alt on CPUs that have the ADX extension. */ | ||
| 112 | bignum_sqr_4_8_alt((uint64_t *)rd, (uint64_t *)ad); | ||
| 113 | } | ||
| 114 | #endif | ||
| 115 | |||
| 116 | #ifdef HAVE_BN_SQR_COMBA8 | ||
| 117 | void | ||
| 118 | bn_sqr_comba8(BN_ULONG *rd, const BN_ULONG *ad) | ||
| 119 | { | ||
| 120 | /* XXX - consider using non-alt on CPUs that have the ADX extension. */ | ||
| 121 | bignum_sqr_8_16_alt((uint64_t *)rd, (uint64_t *)ad); | ||
| 122 | } | ||
| 123 | #endif | ||
| 124 | |||
| 125 | #ifdef HAVE_BN_WORD_CLZ | ||
| 126 | int | ||
| 127 | bn_word_clz(BN_ULONG w) | ||
| 128 | { | ||
| 129 | return word_clz(w); | ||
| 130 | } | ||
| 131 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h b/src/lib/libcrypto/bn/arch/amd64/bn_arch.h deleted file mode 100644 index 927cd75208..0000000000 --- a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h +++ /dev/null | |||
| @@ -1,109 +0,0 @@ | |||
| 1 | /* $OpenBSD: bn_arch.h,v 1.14 2024/03/26 06:09:25 jsing Exp $ */ | ||
| 2 | /* | ||
| 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | ||
| 4 | * | ||
| 5 | * Permission to use, copy, modify, and distribute this software for any | ||
| 6 | * purpose with or without fee is hereby granted, provided that the above | ||
| 7 | * copyright notice and this permission notice appear in all copies. | ||
| 8 | * | ||
| 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #include <openssl/bn.h> | ||
| 19 | |||
| 20 | #ifndef HEADER_BN_ARCH_H | ||
| 21 | #define HEADER_BN_ARCH_H | ||
| 22 | |||
| 23 | #ifndef OPENSSL_NO_ASM | ||
| 24 | |||
| 25 | #define HAVE_BN_ADD | ||
| 26 | #define HAVE_BN_ADD_WORDS | ||
| 27 | |||
| 28 | #define HAVE_BN_DIV_WORDS | ||
| 29 | |||
| 30 | #define HAVE_BN_MUL_ADD_WORDS | ||
| 31 | #define HAVE_BN_MUL_COMBA4 | ||
| 32 | #define HAVE_BN_MUL_COMBA8 | ||
| 33 | #define HAVE_BN_MUL_WORDS | ||
| 34 | |||
| 35 | #define HAVE_BN_SQR | ||
| 36 | #define HAVE_BN_SQR_COMBA4 | ||
| 37 | #define HAVE_BN_SQR_COMBA8 | ||
| 38 | |||
| 39 | #define HAVE_BN_SUB | ||
| 40 | #define HAVE_BN_SUB_WORDS | ||
| 41 | |||
| 42 | #define HAVE_BN_WORD_CLZ | ||
| 43 | |||
| 44 | #if defined(__GNUC__) | ||
| 45 | |||
| 46 | #define HAVE_BN_DIV_REM_WORDS_INLINE | ||
| 47 | |||
| 48 | static inline void | ||
| 49 | bn_div_rem_words_inline(BN_ULONG h, BN_ULONG l, BN_ULONG d, BN_ULONG *out_q, | ||
| 50 | BN_ULONG *out_r) | ||
| 51 | { | ||
| 52 | BN_ULONG q, r; | ||
| 53 | |||
| 54 | /* | ||
| 55 | * Unsigned division of %rdx:%rax by d with quotient being stored in | ||
| 56 | * %rax and remainder in %rdx. | ||
| 57 | */ | ||
| 58 | __asm__ volatile ("divq %4" | ||
| 59 | : "=a"(q), "=d"(r) | ||
| 60 | : "d"(h), "a"(l), "rm"(d) | ||
| 61 | : "cc"); | ||
| 62 | |||
| 63 | *out_q = q; | ||
| 64 | *out_r = r; | ||
| 65 | } | ||
| 66 | |||
| 67 | #define HAVE_BN_MULW | ||
| 68 | |||
| 69 | static inline void | ||
| 70 | bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0) | ||
| 71 | { | ||
| 72 | BN_ULONG r1, r0; | ||
| 73 | |||
| 74 | /* | ||
| 75 | * Unsigned multiplication of %rax, with the double word result being | ||
| 76 | * stored in %rdx:%rax. | ||
| 77 | */ | ||
| 78 | __asm__ ("mulq %3" | ||
| 79 | : "=d"(r1), "=a"(r0) | ||
| 80 | : "a"(a), "rm"(b) | ||
| 81 | : "cc"); | ||
| 82 | |||
| 83 | *out_r1 = r1; | ||
| 84 | *out_r0 = r0; | ||
| 85 | } | ||
| 86 | |||
| 87 | #define HAVE_BN_SUBW | ||
| 88 | |||
| 89 | static inline void | ||
| 90 | bn_subw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_borrow, BN_ULONG *out_r0) | ||
| 91 | { | ||
| 92 | BN_ULONG borrow, r0; | ||
| 93 | |||
| 94 | __asm__ ( | ||
| 95 | "subq %3, %1 \n" | ||
| 96 | "setb %b0 \n" | ||
| 97 | "and $1, %0 \n" | ||
| 98 | : "=r"(borrow), "=r"(r0) | ||
| 99 | : "1"(a), "rm"(b) | ||
| 100 | : "cc"); | ||
| 101 | |||
| 102 | *out_borrow = borrow; | ||
| 103 | *out_r0 = r0; | ||
| 104 | } | ||
| 105 | |||
| 106 | #endif /* __GNUC__ */ | ||
| 107 | |||
| 108 | #endif | ||
| 109 | #endif | ||
diff --git a/src/lib/libcrypto/bn/arch/amd64/word_clz.S b/src/lib/libcrypto/bn/arch/amd64/word_clz.S deleted file mode 100644 index 3926fcd4b0..0000000000 --- a/src/lib/libcrypto/bn/arch/amd64/word_clz.S +++ /dev/null | |||
| @@ -1,60 +0,0 @@ | |||
| 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| 2 | // | ||
| 3 | // Permission to use, copy, modify, and/or distribute this software for any | ||
| 4 | // purpose with or without fee is hereby granted, provided that the above | ||
| 5 | // copyright notice and this permission notice appear in all copies. | ||
| 6 | // | ||
| 7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 10 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 12 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 13 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 14 | |||
| 15 | // ---------------------------------------------------------------------------- | ||
| 16 | // Count leading zero bits in a single word | ||
| 17 | // Input a; output function return | ||
| 18 | // | ||
| 19 | // extern uint64_t word_clz (uint64_t a); | ||
| 20 | // | ||
| 21 | // Standard x86-64 ABI: RDI = a, returns RAX | ||
| 22 | // Microsoft x64 ABI: RCX = a, returns RAX | ||
| 23 | // ---------------------------------------------------------------------------- | ||
| 24 | |||
| 25 | #include "s2n_bignum_internal.h" | ||
| 26 | |||
| 27 | .intel_syntax noprefix | ||
| 28 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(word_clz) | ||
| 29 | S2N_BN_SYM_PRIVACY_DIRECTIVE(word_clz) | ||
| 30 | .text | ||
| 31 | |||
| 32 | S2N_BN_SYMBOL(word_clz): | ||
| 33 | _CET_ENDBR | ||
| 34 | |||
| 35 | #if WINDOWS_ABI | ||
| 36 | push rdi | ||
| 37 | push rsi | ||
| 38 | mov rdi, rcx | ||
| 39 | #endif | ||
| 40 | |||
| 41 | // First do rax = 63 - bsr(a), which is right except (maybe) for zero inputs | ||
| 42 | |||
| 43 | bsr rax, rdi | ||
| 44 | xor rax, 63 | ||
| 45 | |||
| 46 | // Force return of 64 in the zero-input case | ||
| 47 | |||
| 48 | mov edx, 64 | ||
| 49 | test rdi, rdi | ||
| 50 | cmove rax, rdx | ||
| 51 | |||
| 52 | #if WINDOWS_ABI | ||
| 53 | pop rsi | ||
| 54 | pop rdi | ||
| 55 | #endif | ||
| 56 | ret | ||
| 57 | |||
| 58 | #if defined(__linux__) && defined(__ELF__) | ||
| 59 | .section .note.GNU-stack,"",%progbits | ||
| 60 | #endif | ||
