summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S')
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S197
1 files changed, 0 insertions, 197 deletions
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S
deleted file mode 100644
index 54e3f59442..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S
+++ /dev/null
@@ -1,197 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Square z := x^2
17// Input x[n]; output z[k]
18//
19// extern void bignum_sqr
20// (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x);
21//
22// Does the "z := x^2" operation where x is n digits and result z is k.
23// Truncates the result in general unless k >= 2 * n
24//
25// Standard x86-64 ABI: RDI = k, RSI = z, RDX = n, RCX = x
26// Microsoft x64 ABI: RCX = k, RDX = z, R8 = n, R9 = x
27// ----------------------------------------------------------------------------
28
29#include "s2n_bignum_internal.h"
30
31 .intel_syntax noprefix
32 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr)
33 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr)
34 .text
35
36// First three are where arguments come in, but n is moved.
37
38#define p rdi
39#define z rsi
40#define x rcx
41#define n r8
42
43// These are always local scratch since multiplier result is in these
44
45#define a rax
46#define d rdx
47
48// Other variables
49
50#define i rbx
51#define ll rbp
52#define hh r9
53#define k r10
54#define y r11
55#define htop r12
56#define l r13
57#define h r14
58#define c r15
59
60// Short versions
61
62#define llshort ebp
63
64S2N_BN_SYMBOL(bignum_sqr):
65 _CET_ENDBR
66
67#if WINDOWS_ABI
68 push rdi
69 push rsi
70 mov rdi, rcx
71 mov rsi, rdx
72 mov rdx, r8
73 mov rcx, r9
74#endif
75
76// We use too many registers, and also we need rax:rdx for multiplications
77
78 push rbx
79 push rbp
80 push r12
81 push r13
82 push r14
83 push r15
84 mov n, rdx
85
86// If p = 0 the result is trivial and nothing needs doing
87
88 test p, p
89 jz end
90
91// initialize (hh,ll) = 0
92
93 xor llshort, llshort
94 xor hh, hh
95
96// Iterate outer loop from k = 0 ... k = p - 1 producing result digits
97
98 xor k, k
99
100outerloop:
101
102// First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n
103// We want to accumulate all x[i] * x[k - i] for bot <= i < top
104// For the optimization of squaring we avoid duplication and do
105// 2 * x[i] * x[k - i] for i < htop, where htop = MIN ((k+1)/2) n
106// Initialize i = bot; in fact just compute bot as i directly.
107
108 xor c, c
109 lea i, [k+1]
110 mov htop, i
111 shr htop, 1
112 sub i, n
113 cmovc i, c
114 cmp htop, n
115 cmovnc htop, n
116
117// Initialize the three-part local sum (c,h,l); c was already done above
118
119 xor l, l
120 xor h, h
121
122// If htop <= bot then main doubled part of the sum is empty
123
124 cmp i, htop
125 jnc nosumming
126
127// Use a moving pointer for [y] = x[k-i] for the cofactor
128
129 mov a, k
130 sub a, i
131 lea y, [x+8*a]
132
133// Do the main part of the sum x[i] * x[k - i] for 2 * i < k
134
135innerloop:
136 mov a, [x+8*i]
137 mul QWORD PTR [y]
138 add l, a
139 adc h, d
140 adc c, 0
141 sub y, 8
142 inc i
143 cmp i, htop
144 jc innerloop
145
146// Now double it
147
148 add l, l
149 adc h, h
150 adc c, c
151
152// If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term
153
154nosumming:
155 test k, 1
156 jnz innerend
157 cmp i, n
158 jnc innerend
159
160 mov a, [x+8*i]
161 mul a
162 add l, a
163 adc h, d
164 adc c, 0
165
166// Now add the local sum into the global sum, store and shift
167
168innerend:
169 add l, ll
170 mov [z+8*k], l
171 adc h, hh
172 mov ll, h
173 adc c, 0
174 mov hh, c
175
176 inc k
177 cmp k, p
178 jc outerloop
179
180// Restore registers and return
181
182end:
183 pop r15
184 pop r14
185 pop r13
186 pop r12
187 pop rbp
188 pop rbx
189#if WINDOWS_ABI
190 pop rsi
191 pop rdi
192#endif
193 ret
194
195#if defined(__linux__) && defined(__ELF__)
196.section .note.GNU-stack,"",%progbits
197#endif