diff options
Diffstat (limited to 'src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S')
-rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S | 155 |
1 files changed, 0 insertions, 155 deletions
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S deleted file mode 100644 index 25ba17bce2..0000000000 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S +++ /dev/null | |||
@@ -1,155 +0,0 @@ | |||
1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
2 | // | ||
3 | // Permission to use, copy, modify, and/or distribute this software for any | ||
4 | // purpose with or without fee is hereby granted, provided that the above | ||
5 | // copyright notice and this permission notice appear in all copies. | ||
6 | // | ||
7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
10 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
12 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
13 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
14 | |||
15 | // ---------------------------------------------------------------------------- | ||
16 | // Multiply-add with single-word multiplier, z := z + c * y | ||
17 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] | ||
18 | // | ||
19 | // extern uint64_t bignum_cmadd | ||
20 | // (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); | ||
21 | // | ||
22 | // Does the "z := z + c * y" operation where y is n digits, result z is p. | ||
23 | // Truncates the result in general. | ||
24 | // | ||
25 | // The return value is a high/carry word that is meaningful when p = n + 1, or | ||
26 | // more generally when n <= p and the result fits in p + 1 digits. In these | ||
27 | // cases it gives the top digit of the (p + 1)-digit result. | ||
28 | // | ||
29 | // Standard x86-64 ABI: RDI = k, RSI = z, RDX = c, RCX = n, R8 = y, returns RAX | ||
30 | // Microsoft x64 ABI: RCX = k, RDX = z, R8 = c, R9 = n, [RSP+40] = y, returns RAX | ||
31 | // ---------------------------------------------------------------------------- | ||
32 | |||
33 | #include "s2n_bignum_internal.h" | ||
34 | |||
35 | .intel_syntax noprefix | ||
36 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmadd) | ||
37 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmadd) | ||
38 | .text | ||
39 | |||
40 | #define p rdi | ||
41 | #define z rsi | ||
42 | #define c r9 | ||
43 | #define n rcx | ||
44 | #define x r8 | ||
45 | |||
46 | #define i r10 | ||
47 | #define h r11 | ||
48 | |||
49 | #define r rbx | ||
50 | |||
51 | #define hshort r11d | ||
52 | #define ishort r10d | ||
53 | |||
54 | |||
55 | |||
56 | S2N_BN_SYMBOL(bignum_cmadd): | ||
57 | _CET_ENDBR | ||
58 | |||
59 | #if WINDOWS_ABI | ||
60 | push rdi | ||
61 | push rsi | ||
62 | mov rdi, rcx | ||
63 | mov rsi, rdx | ||
64 | mov rdx, r8 | ||
65 | mov rcx, r9 | ||
66 | mov r8, [rsp+56] | ||
67 | #endif | ||
68 | |||
69 | // Seems hard to avoid one more register | ||
70 | |||
71 | push rbx | ||
72 | |||
73 | // First clamp the input size n := min(p,n) since we can never need to read | ||
74 | // past the p'th term of the input to generate p-digit output. | ||
75 | // Subtract p := p - min(n,p) so it holds the size of the extra tail needed | ||
76 | |||
77 | cmp p, n | ||
78 | cmovc n, p | ||
79 | sub p, n | ||
80 | |||
81 | // Initialize high part h = 0; if n = 0 do nothing but return that zero | ||
82 | |||
83 | xor h, h | ||
84 | test n, n | ||
85 | jz end | ||
86 | |||
87 | // Move c into a safer register as multiplies overwrite rdx | ||
88 | |||
89 | mov c, rdx | ||
90 | |||
91 | // Initialization of the loop: 2^64 * CF + [h,z_0'] = z_0 + c * x_0 | ||
92 | |||
93 | mov rax, [x] | ||
94 | mul c | ||
95 | add [z], rax | ||
96 | mov h, rdx | ||
97 | mov ishort, 1 | ||
98 | dec n | ||
99 | jz hightail | ||
100 | |||
101 | // Main loop, where we always have CF + previous high part h to add in | ||
102 | |||
103 | loop: | ||
104 | adc h, [z+8*i] | ||
105 | sbb r, r | ||
106 | mov rax, [x+8*i] | ||
107 | mul c | ||
108 | sub rdx, r | ||
109 | add rax, h | ||
110 | mov [z+8*i], rax | ||
111 | mov h, rdx | ||
112 | inc i | ||
113 | dec n | ||
114 | jnz loop | ||
115 | |||
116 | hightail: | ||
117 | adc h, 0 | ||
118 | |||
119 | // Propagate the carry all the way to the end with h as extra carry word | ||
120 | |||
121 | tail: | ||
122 | test p, p | ||
123 | jz end | ||
124 | |||
125 | add [z+8*i], h | ||
126 | mov hshort, 0 | ||
127 | inc i | ||
128 | dec p | ||
129 | jz highend | ||
130 | |||
131 | tloop: | ||
132 | adc [z+8*i], h | ||
133 | inc i | ||
134 | dec p | ||
135 | jnz tloop | ||
136 | |||
137 | highend: | ||
138 | |||
139 | adc h, 0 | ||
140 | |||
141 | // Return the high/carry word | ||
142 | |||
143 | end: | ||
144 | mov rax, h | ||
145 | |||
146 | pop rbx | ||
147 | #if WINDOWS_ABI | ||
148 | pop rsi | ||
149 | pop rdi | ||
150 | #endif | ||
151 | ret | ||
152 | |||
153 | #if defined(__linux__) && defined(__ELF__) | ||
154 | .section .note.GNU-stack,"",%progbits | ||
155 | #endif | ||