diff options
Diffstat (limited to 'src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S')
-rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S | 197 |
1 files changed, 0 insertions, 197 deletions
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S deleted file mode 100644 index 54e3f59442..0000000000 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S +++ /dev/null | |||
@@ -1,197 +0,0 @@ | |||
1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
2 | // | ||
3 | // Permission to use, copy, modify, and/or distribute this software for any | ||
4 | // purpose with or without fee is hereby granted, provided that the above | ||
5 | // copyright notice and this permission notice appear in all copies. | ||
6 | // | ||
7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
10 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
12 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
13 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
14 | |||
15 | // ---------------------------------------------------------------------------- | ||
16 | // Square z := x^2 | ||
17 | // Input x[n]; output z[k] | ||
18 | // | ||
19 | // extern void bignum_sqr | ||
20 | // (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x); | ||
21 | // | ||
22 | // Does the "z := x^2" operation where x is n digits and result z is k. | ||
23 | // Truncates the result in general unless k >= 2 * n | ||
24 | // | ||
25 | // Standard x86-64 ABI: RDI = k, RSI = z, RDX = n, RCX = x | ||
26 | // Microsoft x64 ABI: RCX = k, RDX = z, R8 = n, R9 = x | ||
27 | // ---------------------------------------------------------------------------- | ||
28 | |||
29 | #include "s2n_bignum_internal.h" | ||
30 | |||
31 | .intel_syntax noprefix | ||
32 | S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr) | ||
33 | S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr) | ||
34 | .text | ||
35 | |||
36 | // First three are where arguments come in, but n is moved. | ||
37 | |||
38 | #define p rdi | ||
39 | #define z rsi | ||
40 | #define x rcx | ||
41 | #define n r8 | ||
42 | |||
43 | // These are always local scratch since multiplier result is in these | ||
44 | |||
45 | #define a rax | ||
46 | #define d rdx | ||
47 | |||
48 | // Other variables | ||
49 | |||
50 | #define i rbx | ||
51 | #define ll rbp | ||
52 | #define hh r9 | ||
53 | #define k r10 | ||
54 | #define y r11 | ||
55 | #define htop r12 | ||
56 | #define l r13 | ||
57 | #define h r14 | ||
58 | #define c r15 | ||
59 | |||
60 | // Short versions | ||
61 | |||
62 | #define llshort ebp | ||
63 | |||
64 | S2N_BN_SYMBOL(bignum_sqr): | ||
65 | _CET_ENDBR | ||
66 | |||
67 | #if WINDOWS_ABI | ||
68 | push rdi | ||
69 | push rsi | ||
70 | mov rdi, rcx | ||
71 | mov rsi, rdx | ||
72 | mov rdx, r8 | ||
73 | mov rcx, r9 | ||
74 | #endif | ||
75 | |||
76 | // We use too many registers, and also we need rax:rdx for multiplications | ||
77 | |||
78 | push rbx | ||
79 | push rbp | ||
80 | push r12 | ||
81 | push r13 | ||
82 | push r14 | ||
83 | push r15 | ||
84 | mov n, rdx | ||
85 | |||
86 | // If p = 0 the result is trivial and nothing needs doing | ||
87 | |||
88 | test p, p | ||
89 | jz end | ||
90 | |||
91 | // initialize (hh,ll) = 0 | ||
92 | |||
93 | xor llshort, llshort | ||
94 | xor hh, hh | ||
95 | |||
96 | // Iterate outer loop from k = 0 ... k = p - 1 producing result digits | ||
97 | |||
98 | xor k, k | ||
99 | |||
100 | outerloop: | ||
101 | |||
102 | // First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n | ||
103 | // We want to accumulate all x[i] * x[k - i] for bot <= i < top | ||
104 | // For the optimization of squaring we avoid duplication and do | ||
105 | // 2 * x[i] * x[k - i] for i < htop, where htop = MIN ((k+1)/2) n | ||
106 | // Initialize i = bot; in fact just compute bot as i directly. | ||
107 | |||
108 | xor c, c | ||
109 | lea i, [k+1] | ||
110 | mov htop, i | ||
111 | shr htop, 1 | ||
112 | sub i, n | ||
113 | cmovc i, c | ||
114 | cmp htop, n | ||
115 | cmovnc htop, n | ||
116 | |||
117 | // Initialize the three-part local sum (c,h,l); c was already done above | ||
118 | |||
119 | xor l, l | ||
120 | xor h, h | ||
121 | |||
122 | // If htop <= bot then main doubled part of the sum is empty | ||
123 | |||
124 | cmp i, htop | ||
125 | jnc nosumming | ||
126 | |||
127 | // Use a moving pointer for [y] = x[k-i] for the cofactor | ||
128 | |||
129 | mov a, k | ||
130 | sub a, i | ||
131 | lea y, [x+8*a] | ||
132 | |||
133 | // Do the main part of the sum x[i] * x[k - i] for 2 * i < k | ||
134 | |||
135 | innerloop: | ||
136 | mov a, [x+8*i] | ||
137 | mul QWORD PTR [y] | ||
138 | add l, a | ||
139 | adc h, d | ||
140 | adc c, 0 | ||
141 | sub y, 8 | ||
142 | inc i | ||
143 | cmp i, htop | ||
144 | jc innerloop | ||
145 | |||
146 | // Now double it | ||
147 | |||
148 | add l, l | ||
149 | adc h, h | ||
150 | adc c, c | ||
151 | |||
152 | // If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term | ||
153 | |||
154 | nosumming: | ||
155 | test k, 1 | ||
156 | jnz innerend | ||
157 | cmp i, n | ||
158 | jnc innerend | ||
159 | |||
160 | mov a, [x+8*i] | ||
161 | mul a | ||
162 | add l, a | ||
163 | adc h, d | ||
164 | adc c, 0 | ||
165 | |||
166 | // Now add the local sum into the global sum, store and shift | ||
167 | |||
168 | innerend: | ||
169 | add l, ll | ||
170 | mov [z+8*k], l | ||
171 | adc h, hh | ||
172 | mov ll, h | ||
173 | adc c, 0 | ||
174 | mov hh, c | ||
175 | |||
176 | inc k | ||
177 | cmp k, p | ||
178 | jc outerloop | ||
179 | |||
180 | // Restore registers and return | ||
181 | |||
182 | end: | ||
183 | pop r15 | ||
184 | pop r14 | ||
185 | pop r13 | ||
186 | pop r12 | ||
187 | pop rbp | ||
188 | pop rbx | ||
189 | #if WINDOWS_ABI | ||
190 | pop rsi | ||
191 | pop rdi | ||
192 | #endif | ||
193 | ret | ||
194 | |||
195 | #if defined(__linux__) && defined(__ELF__) | ||
196 | .section .note.GNU-stack,"",%progbits | ||
197 | #endif | ||