summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn/arch/amd64
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn/arch/amd64')
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_add.S165
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S155
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S138
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_mul.S167
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S157
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S244
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S197
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S145
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S242
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sub.S153
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bn_arch.c131
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bn_arch.h109
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/word_clz.S60
13 files changed, 0 insertions, 2063 deletions
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S b/src/lib/libcrypto/bn/arch/amd64/bignum_add.S
deleted file mode 100644
index 5fe4aae7a1..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S
+++ /dev/null
@@ -1,165 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Add, z := x + y
17// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
18//
19// extern uint64_t bignum_add
20// (uint64_t p, uint64_t *z,
21// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
22//
23// Does the z := x + y operation, truncating modulo p words in general and
24// returning a top carry (0 or 1) in the p'th place, only adding the input
25// words below p (as well as m and n respectively) to get the sum and carry.
26//
27// Standard x86-64 ABI: RDI = p, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y, returns RAX
28// Microsoft x64 ABI: RCX = p, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y, returns RAX
29// ----------------------------------------------------------------------------
30
31#include "s2n_bignum_internal.h"
32
33 .intel_syntax noprefix
34 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_add)
35 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_add)
36 .text
37
38#define p rdi
39#define z rsi
40#define m rdx
41#define x rcx
42#define n r8
43#define y r9
44#define i r10
45#define a rax
46
47#define ashort eax
48
49
50
51S2N_BN_SYMBOL(bignum_add):
52 _CET_ENDBR
53
54#if WINDOWS_ABI
55 push rdi
56 push rsi
57 mov rdi, rcx
58 mov rsi, rdx
59 mov rdx, r8
60 mov rcx, r9
61 mov r8, [rsp+56]
62 mov r9, [rsp+64]
63#endif
64
65// Zero the main index counter for both branches
66
67 xor i, i
68
69// First clamp the two input sizes m := min(p,m) and n := min(p,n) since
70// we'll never need words past the p'th. Can now assume m <= p and n <= p.
71// Then compare the modified m and n and branch accordingly
72
73 cmp p, m
74 cmovc m, p
75 cmp p, n
76 cmovc n, p
77 cmp m, n
78 jc ylonger
79
80// The case where x is longer or of the same size (p >= m >= n)
81
82 sub p, m
83 sub m, n
84 inc m
85 test n, n
86 jz xtest
87xmainloop:
88 mov a, [x+8*i]
89 adc a, [y+8*i]
90 mov [z+8*i],a
91 inc i
92 dec n
93 jnz xmainloop
94 jmp xtest
95xtoploop:
96 mov a, [x+8*i]
97 adc a, 0
98 mov [z+8*i],a
99 inc i
100xtest:
101 dec m
102 jnz xtoploop
103 mov ashort, 0
104 adc a, 0
105 test p, p
106 jnz tails
107#if WINDOWS_ABI
108 pop rsi
109 pop rdi
110#endif
111 ret
112
113// The case where y is longer (p >= n > m)
114
115ylonger:
116
117 sub p, n
118 sub n, m
119 test m, m
120 jz ytoploop
121ymainloop:
122 mov a, [x+8*i]
123 adc a, [y+8*i]
124 mov [z+8*i],a
125 inc i
126 dec m
127 jnz ymainloop
128ytoploop:
129 mov a, [y+8*i]
130 adc a, 0
131 mov [z+8*i],a
132 inc i
133 dec n
134 jnz ytoploop
135 mov ashort, 0
136 adc a, 0
137 test p, p
138 jnz tails
139#if WINDOWS_ABI
140 pop rsi
141 pop rdi
142#endif
143 ret
144
145// Adding a non-trivial tail, when p > max(m,n)
146
147tails:
148 mov [z+8*i],a
149 xor a, a
150 jmp tail
151tailloop:
152 mov [z+8*i],a
153tail:
154 inc i
155 dec p
156 jnz tailloop
157#if WINDOWS_ABI
158 pop rsi
159 pop rdi
160#endif
161 ret
162
163#if defined(__linux__) && defined(__ELF__)
164.section .note.GNU-stack,"",%progbits
165#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S
deleted file mode 100644
index 25ba17bce2..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S
+++ /dev/null
@@ -1,155 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Multiply-add with single-word multiplier, z := z + c * y
17// Inputs c, y[n]; outputs function return (carry-out) and z[k]
18//
19// extern uint64_t bignum_cmadd
20// (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y);
21//
22// Does the "z := z + c * y" operation where y is n digits, result z is p.
23// Truncates the result in general.
24//
25// The return value is a high/carry word that is meaningful when p = n + 1, or
26// more generally when n <= p and the result fits in p + 1 digits. In these
27// cases it gives the top digit of the (p + 1)-digit result.
28//
29// Standard x86-64 ABI: RDI = k, RSI = z, RDX = c, RCX = n, R8 = y, returns RAX
30// Microsoft x64 ABI: RCX = k, RDX = z, R8 = c, R9 = n, [RSP+40] = y, returns RAX
31// ----------------------------------------------------------------------------
32
33#include "s2n_bignum_internal.h"
34
35 .intel_syntax noprefix
36 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmadd)
37 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmadd)
38 .text
39
40#define p rdi
41#define z rsi
42#define c r9
43#define n rcx
44#define x r8
45
46#define i r10
47#define h r11
48
49#define r rbx
50
51#define hshort r11d
52#define ishort r10d
53
54
55
56S2N_BN_SYMBOL(bignum_cmadd):
57 _CET_ENDBR
58
59#if WINDOWS_ABI
60 push rdi
61 push rsi
62 mov rdi, rcx
63 mov rsi, rdx
64 mov rdx, r8
65 mov rcx, r9
66 mov r8, [rsp+56]
67#endif
68
69// Seems hard to avoid one more register
70
71 push rbx
72
73// First clamp the input size n := min(p,n) since we can never need to read
74// past the p'th term of the input to generate p-digit output.
75// Subtract p := p - min(n,p) so it holds the size of the extra tail needed
76
77 cmp p, n
78 cmovc n, p
79 sub p, n
80
81// Initialize high part h = 0; if n = 0 do nothing but return that zero
82
83 xor h, h
84 test n, n
85 jz end
86
87// Move c into a safer register as multiplies overwrite rdx
88
89 mov c, rdx
90
91// Initialization of the loop: 2^64 * CF + [h,z_0'] = z_0 + c * x_0
92
93 mov rax, [x]
94 mul c
95 add [z], rax
96 mov h, rdx
97 mov ishort, 1
98 dec n
99 jz hightail
100
101// Main loop, where we always have CF + previous high part h to add in
102
103loop:
104 adc h, [z+8*i]
105 sbb r, r
106 mov rax, [x+8*i]
107 mul c
108 sub rdx, r
109 add rax, h
110 mov [z+8*i], rax
111 mov h, rdx
112 inc i
113 dec n
114 jnz loop
115
116hightail:
117 adc h, 0
118
119// Propagate the carry all the way to the end with h as extra carry word
120
121tail:
122 test p, p
123 jz end
124
125 add [z+8*i], h
126 mov hshort, 0
127 inc i
128 dec p
129 jz highend
130
131tloop:
132 adc [z+8*i], h
133 inc i
134 dec p
135 jnz tloop
136
137highend:
138
139 adc h, 0
140
141// Return the high/carry word
142
143end:
144 mov rax, h
145
146 pop rbx
147#if WINDOWS_ABI
148 pop rsi
149 pop rdi
150#endif
151 ret
152
153#if defined(__linux__) && defined(__ELF__)
154.section .note.GNU-stack,"",%progbits
155#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S
deleted file mode 100644
index 12f785d63a..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S
+++ /dev/null
@@ -1,138 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Multiply by a single word, z := c * y
17// Inputs c, y[n]; outputs function return (carry-out) and z[k]
18//
19// extern uint64_t bignum_cmul
20// (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y);
21//
22// Does the "z := c * y" operation where y is n digits, result z is p.
23// Truncates the result in general unless p >= n + 1.
24//
25// The return value is a high/carry word that is meaningful when p >= n as
26// giving the high part of the result. Since this is always zero if p > n,
27// it is mainly of interest in the special case p = n, i.e. where the source
28// and destination have the same nominal size, when it gives the extra word
29// of the full result.
30//
31// Standard x86-64 ABI: RDI = k, RSI = z, RDX = c, RCX = n, R8 = y, returns RAX
32// Microsoft x64 ABI: RCX = k, RDX = z, R8 = c, R9 = n, [RSP+40] = y, returns RAX
33// ----------------------------------------------------------------------------
34
35#include "s2n_bignum_internal.h"
36
37 .intel_syntax noprefix
38 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul)
39 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul)
40 .text
41
42#define p rdi
43#define z rsi
44#define c r9
45#define n rcx
46#define x r8
47
48#define i r10
49#define h r11
50
51
52
53S2N_BN_SYMBOL(bignum_cmul):
54 _CET_ENDBR
55
56#if WINDOWS_ABI
57 push rdi
58 push rsi
59 mov rdi, rcx
60 mov rsi, rdx
61 mov rdx, r8
62 mov rcx, r9
63 mov r8, [rsp+56]
64#endif
65
66// First clamp the input size n := min(p,n) since we can never need to read
67// past the p'th term of the input to generate p-digit output. Now we can
68// assume that n <= p
69
70 cmp p, n
71 cmovc n, p
72
73// Initialize current input/output pointer offset i and high part h.
74// But then if n = 0 skip the multiplication and go to the tail part
75
76 xor h, h
77 xor i, i
78 test n, n
79 jz tail
80
81// Move c into a safer register as multiplies overwrite rdx
82
83 mov c, rdx
84
85// Initialization of the loop: [h,l] = c * x_0
86
87 mov rax, [x]
88 mul c
89 mov [z], rax
90 mov h, rdx
91 inc i
92 cmp i, n
93 jz tail
94
95// Main loop doing the multiplications
96
97loop:
98 mov rax, [x+8*i]
99 mul c
100 add rax, h
101 adc rdx, 0
102 mov [z+8*i], rax
103 mov h, rdx
104 inc i
105 cmp i, n
106 jc loop
107
108// Add a tail when the destination is longer
109
110tail:
111 cmp i, p
112 jnc end
113 mov [z+8*i], h
114 xor h, h
115 inc i
116 cmp i, p
117 jnc end
118
119tloop:
120 mov [z+8*i], h
121 inc i
122 cmp i, p
123 jc tloop
124
125// Return the high/carry word
126
127end:
128 mov rax, h
129
130#if WINDOWS_ABI
131 pop rsi
132 pop rdi
133#endif
134 ret
135
136#if defined(__linux__) && defined(__ELF__)
137.section .note.GNU-stack,"",%progbits
138#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S
deleted file mode 100644
index a3552679a2..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S
+++ /dev/null
@@ -1,167 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Multiply z := x * y
17// Inputs x[m], y[n]; output z[k]
18//
19// extern void bignum_mul
20// (uint64_t k, uint64_t *z,
21// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
22//
23// Does the "z := x * y" operation where x is m digits, y is n, result z is k.
24// Truncates the result in general unless k >= m + n
25//
26// Standard x86-64 ABI: RDI = k, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y
27// Microsoft x64 ABI: RCX = k, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y
28// ----------------------------------------------------------------------------
29
30#include "s2n_bignum_internal.h"
31
32 .intel_syntax noprefix
33 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul)
34 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul)
35 .text
36
37// These are actually right
38
39#define p rdi
40#define z rsi
41#define n r8
42
43// These are not
44
45#define c r15
46#define h r14
47#define l r13
48#define x r12
49#define y r11
50#define i rbx
51#define k r10
52#define m rbp
53
54// These are always local scratch since multiplier result is in these
55
56#define a rax
57#define d rdx
58
59
60
61S2N_BN_SYMBOL(bignum_mul):
62 _CET_ENDBR
63
64#if WINDOWS_ABI
65 push rdi
66 push rsi
67 mov rdi, rcx
68 mov rsi, rdx
69 mov rdx, r8
70 mov rcx, r9
71 mov r8, [rsp+56]
72 mov r9, [rsp+64]
73#endif
74
75// We use too many registers, and also we need rax:rdx for multiplications
76
77 push rbx
78 push rbp
79 push r12
80 push r13
81 push r14
82 push r15
83 mov m, rdx
84
85// If the result size is zero, do nothing
86// Note that even if either or both inputs has size zero, we can't
87// just give up because we at least need to zero the output array
88// If we did a multiply-add variant, however, then we could
89
90 test p, p
91 jz end
92
93// Set initial 2-part sum to zero (we zero c inside the body)
94
95 xor h,h
96 xor l,l
97
98// Otherwise do outer loop k = 0 ... k = p - 1
99
100 xor k, k
101
102outerloop:
103
104// Zero our carry term first; we eventually want it and a zero is useful now
105// Set a = max 0 (k + 1 - n), i = min (k + 1) m
106// This defines the range a <= j < i for the inner summation
107// Note that since k < p < 2^64 we can assume k + 1 doesn't overflow
108// And since we want to increment it anyway, we might as well do it now
109
110 xor c, c // c = 0
111 inc k // k = k + 1
112
113 mov a, k // a = k + 1
114 sub a, n // a = k + 1 - n
115 cmovc a, c // a = max 0 (k + 1 - n)
116
117 mov i, m // i = m
118 cmp k, m // CF <=> k + 1 < m
119 cmovc i, k // i = min (k + 1) m
120
121// Turn i into a loop count, and skip things if it's <= 0
122// Otherwise set up initial pointers x -> x0[a] and y -> y0[k - a]
123// and then launch into the main inner loop, postdecrementing i
124
125 mov d, k
126 sub d, i
127 sub i, a
128 jbe innerend
129 lea x,[rcx+8*a]
130 lea y,[r9+8*d-8]
131
132innerloop:
133 mov rax, [y+8*i]
134 mul QWORD PTR [x]
135 add x, 8
136 add l, rax
137 adc h, rdx
138 adc c, 0
139 dec i
140 jnz innerloop
141
142innerend:
143
144 mov [z], l
145 mov l, h
146 mov h, c
147 add z, 8
148
149 cmp k, p
150 jc outerloop
151
152end:
153 pop r15
154 pop r14
155 pop r13
156 pop r12
157 pop rbp
158 pop rbx
159#if WINDOWS_ABI
160 pop rsi
161 pop rdi
162#endif
163 ret
164
165#if defined(__linux__) && defined(__ELF__)
166.section .note.GNU-stack,"",%progbits
167#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S
deleted file mode 100644
index 70ff69e372..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S
+++ /dev/null
@@ -1,157 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Multiply z := x * y
17// Inputs x[4], y[4]; output z[8]
18//
19// extern void bignum_mul_4_8_alt
20// (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]);
21//
22// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y
23// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y
24// ----------------------------------------------------------------------------
25
26#include "s2n_bignum_internal.h"
27
28 .intel_syntax noprefix
29 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_4_8_alt)
30 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_4_8_alt)
31 .text
32
33// These are actually right
34
35#define z rdi
36#define x rsi
37
38// This is moved from rdx to free it for muls
39
40#define y rcx
41
42// Other variables used as a rotating 3-word window to add terms to
43
44#define t0 r8
45#define t1 r9
46#define t2 r10
47
48// Macro for the key "multiply and add to (c,h,l)" step
49
50#define combadd(c,h,l,numa,numb) \
51 mov rax, numa; \
52 mul QWORD PTR numb; \
53 add l, rax; \
54 adc h, rdx; \
55 adc c, 0
56
57// A minutely shorter form for when c = 0 initially
58
59#define combadz(c,h,l,numa,numb) \
60 mov rax, numa; \
61 mul QWORD PTR numb; \
62 add l, rax; \
63 adc h, rdx; \
64 adc c, c
65
66// A short form where we don't expect a top carry
67
68#define combads(h,l,numa,numb) \
69 mov rax, numa; \
70 mul QWORD PTR numb; \
71 add l, rax; \
72 adc h, rdx
73
74S2N_BN_SYMBOL(bignum_mul_4_8_alt):
75 _CET_ENDBR
76
77#if WINDOWS_ABI
78 push rdi
79 push rsi
80 mov rdi, rcx
81 mov rsi, rdx
82 mov rdx, r8
83#endif
84
85// Copy y into a safe register to start with
86
87 mov y, rdx
88
89// Result term 0
90
91 mov rax, [x]
92 mul QWORD PTR [y]
93
94 mov [z], rax
95 mov t0, rdx
96 xor t1, t1
97
98// Result term 1
99
100 xor t2, t2
101 combads(t1,t0,[x],[y+8])
102 combadz(t2,t1,t0,[x+8],[y])
103 mov [z+8], t0
104
105// Result term 2
106
107 xor t0, t0
108 combadz(t0,t2,t1,[x],[y+16])
109 combadd(t0,t2,t1,[x+8],[y+8])
110 combadd(t0,t2,t1,[x+16],[y])
111 mov [z+16], t1
112
113// Result term 3
114
115 xor t1, t1
116 combadz(t1,t0,t2,[x],[y+24])
117 combadd(t1,t0,t2,[x+8],[y+16])
118 combadd(t1,t0,t2,[x+16],[y+8])
119 combadd(t1,t0,t2,[x+24],[y])
120 mov [z+24], t2
121
122// Result term 4
123
124 xor t2, t2
125 combadz(t2,t1,t0,[x+8],[y+24])
126 combadd(t2,t1,t0,[x+16],[y+16])
127 combadd(t2,t1,t0,[x+24],[y+8])
128 mov [z+32], t0
129
130// Result term 5
131
132 xor t0, t0
133 combadz(t0,t2,t1,[x+16],[y+24])
134 combadd(t0,t2,t1,[x+24],[y+16])
135 mov [z+40], t1
136
137// Result term 6
138
139 xor t1, t1
140 combads(t0,t2,[x+24],[y+24])
141 mov [z+48], t2
142
143// Result term 7
144
145 mov [z+56], t0
146
147// Return
148
149#if WINDOWS_ABI
150 pop rsi
151 pop rdi
152#endif
153 ret
154
155#if defined(__linux__) && defined(__ELF__)
156.section .note.GNU-stack,"",%progbits
157#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S
deleted file mode 100644
index 066403b074..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S
+++ /dev/null
@@ -1,244 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Multiply z := x * y
17// Inputs x[8], y[8]; output z[16]
18//
19// extern void bignum_mul_8_16_alt
20// (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]);
21//
22// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y
23// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y
24// ----------------------------------------------------------------------------
25
26#include "s2n_bignum_internal.h"
27
28 .intel_syntax noprefix
29 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_8_16_alt)
30 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_8_16_alt)
31 .text
32
33// These are actually right
34
35#define z rdi
36#define x rsi
37
38// This is moved from rdx to free it for muls
39
40#define y rcx
41
42// Other variables used as a rotating 3-word window to add terms to
43
44#define t0 r8
45#define t1 r9
46#define t2 r10
47
48// Macro for the key "multiply and add to (c,h,l)" step
49
50#define combadd(c,h,l,numa,numb) \
51 mov rax, numa; \
52 mul QWORD PTR numb; \
53 add l, rax; \
54 adc h, rdx; \
55 adc c, 0
56
57// A minutely shorter form for when c = 0 initially
58
59#define combadz(c,h,l,numa,numb) \
60 mov rax, numa; \
61 mul QWORD PTR numb; \
62 add l, rax; \
63 adc h, rdx; \
64 adc c, c
65
66// A short form where we don't expect a top carry
67
68#define combads(h,l,numa,numb) \
69 mov rax, numa; \
70 mul QWORD PTR numb; \
71 add l, rax; \
72 adc h, rdx
73
74S2N_BN_SYMBOL(bignum_mul_8_16_alt):
75 _CET_ENDBR
76
77#if WINDOWS_ABI
78 push rdi
79 push rsi
80 mov rdi, rcx
81 mov rsi, rdx
82 mov rdx, r8
83#endif
84
85// Copy y into a safe register to start with
86
87 mov y, rdx
88
89// Result term 0
90
91 mov rax, [x]
92 mul QWORD PTR [y]
93
94 mov [z], rax
95 mov t0, rdx
96 xor t1, t1
97
98// Result term 1
99
100 xor t2, t2
101 combads(t1,t0,[x],[y+8])
102 combadz(t2,t1,t0,[x+8],[y])
103 mov [z+8], t0
104
105// Result term 2
106
107 xor t0, t0
108 combadz(t0,t2,t1,[x],[y+16])
109 combadd(t0,t2,t1,[x+8],[y+8])
110 combadd(t0,t2,t1,[x+16],[y])
111 mov [z+16], t1
112
113// Result term 3
114
115 xor t1, t1
116 combadz(t1,t0,t2,[x],[y+24])
117 combadd(t1,t0,t2,[x+8],[y+16])
118 combadd(t1,t0,t2,[x+16],[y+8])
119 combadd(t1,t0,t2,[x+24],[y])
120 mov [z+24], t2
121
122// Result term 4
123
124 xor t2, t2
125 combadz(t2,t1,t0,[x],[y+32])
126 combadd(t2,t1,t0,[x+8],[y+24])
127 combadd(t2,t1,t0,[x+16],[y+16])
128 combadd(t2,t1,t0,[x+24],[y+8])
129 combadd(t2,t1,t0,[x+32],[y])
130 mov [z+32], t0
131
132// Result term 5
133
134 xor t0, t0
135 combadz(t0,t2,t1,[x],[y+40])
136 combadd(t0,t2,t1,[x+8],[y+32])
137 combadd(t0,t2,t1,[x+16],[y+24])
138 combadd(t0,t2,t1,[x+24],[y+16])
139 combadd(t0,t2,t1,[x+32],[y+8])
140 combadd(t0,t2,t1,[x+40],[y])
141 mov [z+40], t1
142
143// Result term 6
144
145 xor t1, t1
146 combadz(t1,t0,t2,[x],[y+48])
147 combadd(t1,t0,t2,[x+8],[y+40])
148 combadd(t1,t0,t2,[x+16],[y+32])
149 combadd(t1,t0,t2,[x+24],[y+24])
150 combadd(t1,t0,t2,[x+32],[y+16])
151 combadd(t1,t0,t2,[x+40],[y+8])
152 combadd(t1,t0,t2,[x+48],[y])
153 mov [z+48], t2
154
155// Result term 7
156
157 xor t2, t2
158 combadz(t2,t1,t0,[x],[y+56])
159 combadd(t2,t1,t0,[x+8],[y+48])
160 combadd(t2,t1,t0,[x+16],[y+40])
161 combadd(t2,t1,t0,[x+24],[y+32])
162 combadd(t2,t1,t0,[x+32],[y+24])
163 combadd(t2,t1,t0,[x+40],[y+16])
164 combadd(t2,t1,t0,[x+48],[y+8])
165 combadd(t2,t1,t0,[x+56],[y])
166 mov [z+56], t0
167
168// Result term 8
169
170 xor t0, t0
171 combadz(t0,t2,t1,[x+8],[y+56])
172 combadd(t0,t2,t1,[x+16],[y+48])
173 combadd(t0,t2,t1,[x+24],[y+40])
174 combadd(t0,t2,t1,[x+32],[y+32])
175 combadd(t0,t2,t1,[x+40],[y+24])
176 combadd(t0,t2,t1,[x+48],[y+16])
177 combadd(t0,t2,t1,[x+56],[y+8])
178 mov [z+64], t1
179
180// Result term 9
181
182 xor t1, t1
183 combadz(t1,t0,t2,[x+16],[y+56])
184 combadd(t1,t0,t2,[x+24],[y+48])
185 combadd(t1,t0,t2,[x+32],[y+40])
186 combadd(t1,t0,t2,[x+40],[y+32])
187 combadd(t1,t0,t2,[x+48],[y+24])
188 combadd(t1,t0,t2,[x+56],[y+16])
189 mov [z+72], t2
190
191// Result term 10
192
193 xor t2, t2
194 combadz(t2,t1,t0,[x+24],[y+56])
195 combadd(t2,t1,t0,[x+32],[y+48])
196 combadd(t2,t1,t0,[x+40],[y+40])
197 combadd(t2,t1,t0,[x+48],[y+32])
198 combadd(t2,t1,t0,[x+56],[y+24])
199 mov [z+80], t0
200
201// Result term 11
202
203 xor t0, t0
204 combadz(t0,t2,t1,[x+32],[y+56])
205 combadd(t0,t2,t1,[x+40],[y+48])
206 combadd(t0,t2,t1,[x+48],[y+40])
207 combadd(t0,t2,t1,[x+56],[y+32])
208 mov [z+88], t1
209
210// Result term 12
211
212 xor t1, t1
213 combadz(t1,t0,t2,[x+40],[y+56])
214 combadd(t1,t0,t2,[x+48],[y+48])
215 combadd(t1,t0,t2,[x+56],[y+40])
216 mov [z+96], t2
217
218// Result term 13
219
220 xor t2, t2
221 combadz(t2,t1,t0,[x+48],[y+56])
222 combadd(t2,t1,t0,[x+56],[y+48])
223 mov [z+104], t0
224
225// Result term 14
226
227 combads(t2,t1,[x+56],[y+56])
228 mov [z+112], t1
229
230// Result term 11
231
232 mov [z+120], t2
233
234// Return
235
236#if WINDOWS_ABI
237 pop rsi
238 pop rdi
239#endif
240 ret
241
242#if defined(__linux__) && defined(__ELF__)
243.section .note.GNU-stack,"",%progbits
244#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S
deleted file mode 100644
index 54e3f59442..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S
+++ /dev/null
@@ -1,197 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Square z := x^2
17// Input x[n]; output z[k]
18//
19// extern void bignum_sqr
20// (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x);
21//
22// Does the "z := x^2" operation where x is n digits and result z is k.
23// Truncates the result in general unless k >= 2 * n
24//
25// Standard x86-64 ABI: RDI = k, RSI = z, RDX = n, RCX = x
26// Microsoft x64 ABI: RCX = k, RDX = z, R8 = n, R9 = x
27// ----------------------------------------------------------------------------
28
29#include "s2n_bignum_internal.h"
30
31 .intel_syntax noprefix
32 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr)
33 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr)
34 .text
35
36// First three are where arguments come in, but n is moved.
37
38#define p rdi
39#define z rsi
40#define x rcx
41#define n r8
42
43// These are always local scratch since multiplier result is in these
44
45#define a rax
46#define d rdx
47
48// Other variables
49
50#define i rbx
51#define ll rbp
52#define hh r9
53#define k r10
54#define y r11
55#define htop r12
56#define l r13
57#define h r14
58#define c r15
59
60// Short versions
61
62#define llshort ebp
63
64S2N_BN_SYMBOL(bignum_sqr):
65 _CET_ENDBR
66
67#if WINDOWS_ABI
68 push rdi
69 push rsi
70 mov rdi, rcx
71 mov rsi, rdx
72 mov rdx, r8
73 mov rcx, r9
74#endif
75
76// We use too many registers, and also we need rax:rdx for multiplications
77
78 push rbx
79 push rbp
80 push r12
81 push r13
82 push r14
83 push r15
84 mov n, rdx
85
86// If p = 0 the result is trivial and nothing needs doing
87
88 test p, p
89 jz end
90
91// initialize (hh,ll) = 0
92
93 xor llshort, llshort
94 xor hh, hh
95
96// Iterate outer loop from k = 0 ... k = p - 1 producing result digits
97
98 xor k, k
99
100outerloop:
101
102// First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n
103// We want to accumulate all x[i] * x[k - i] for bot <= i < top
104// For the optimization of squaring we avoid duplication and do
105// 2 * x[i] * x[k - i] for i < htop, where htop = MIN ((k+1)/2) n
106// Initialize i = bot; in fact just compute bot as i directly.
107
108 xor c, c
109 lea i, [k+1]
110 mov htop, i
111 shr htop, 1
112 sub i, n
113 cmovc i, c
114 cmp htop, n
115 cmovnc htop, n
116
117// Initialize the three-part local sum (c,h,l); c was already done above
118
119 xor l, l
120 xor h, h
121
122// If htop <= bot then main doubled part of the sum is empty
123
124 cmp i, htop
125 jnc nosumming
126
127// Use a moving pointer for [y] = x[k-i] for the cofactor
128
129 mov a, k
130 sub a, i
131 lea y, [x+8*a]
132
133// Do the main part of the sum x[i] * x[k - i] for 2 * i < k
134
135innerloop:
136 mov a, [x+8*i]
137 mul QWORD PTR [y]
138 add l, a
139 adc h, d
140 adc c, 0
141 sub y, 8
142 inc i
143 cmp i, htop
144 jc innerloop
145
146// Now double it
147
148 add l, l
149 adc h, h
150 adc c, c
151
152// If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term
153
154nosumming:
155 test k, 1
156 jnz innerend
157 cmp i, n
158 jnc innerend
159
160 mov a, [x+8*i]
161 mul a
162 add l, a
163 adc h, d
164 adc c, 0
165
166// Now add the local sum into the global sum, store and shift
167
168innerend:
169 add l, ll
170 mov [z+8*k], l
171 adc h, hh
172 mov ll, h
173 adc c, 0
174 mov hh, c
175
176 inc k
177 cmp k, p
178 jc outerloop
179
180// Restore registers and return
181
182end:
183 pop r15
184 pop r14
185 pop r13
186 pop r12
187 pop rbp
188 pop rbx
189#if WINDOWS_ABI
190 pop rsi
191 pop rdi
192#endif
193 ret
194
195#if defined(__linux__) && defined(__ELF__)
196.section .note.GNU-stack,"",%progbits
197#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S
deleted file mode 100644
index 7c534ae907..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S
+++ /dev/null
@@ -1,145 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Square, z := x^2
17// Input x[4]; output z[8]
18//
19// extern void bignum_sqr_4_8_alt
20// (uint64_t z[static 8], uint64_t x[static 4]);
21//
22// Standard x86-64 ABI: RDI = z, RSI = x
23// Microsoft x64 ABI: RCX = z, RDX = x
24// ----------------------------------------------------------------------------
25
26#include "s2n_bignum_internal.h"
27
28 .intel_syntax noprefix
29 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_4_8_alt)
30 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_4_8_alt)
31 .text
32
33// Input arguments
34
35#define z rdi
36#define x rsi
37
38// Other variables used as a rotating 3-word window to add terms to
39
40#define t0 rcx
41#define t1 r8
42#define t2 r9
43
44// Macro for the key "multiply and add to (c,h,l)" step, for square term
45
46#define combadd1(c,h,l,numa) \
47 mov rax, numa; \
48 mul rax; \
49 add l, rax; \
50 adc h, rdx; \
51 adc c, 0
52
53// A short form where we don't expect a top carry
54
55#define combads(h,l,numa) \
56 mov rax, numa; \
57 mul rax; \
58 add l, rax; \
59 adc h, rdx
60
61// A version doubling before adding, for non-square terms
62
63#define combadd2(c,h,l,numa,numb) \
64 mov rax, numa; \
65 mul QWORD PTR numb; \
66 add rax, rax; \
67 adc rdx, rdx; \
68 adc c, 0; \
69 add l, rax; \
70 adc h, rdx; \
71 adc c, 0
72
73S2N_BN_SYMBOL(bignum_sqr_4_8_alt):
74 _CET_ENDBR
75
76#if WINDOWS_ABI
77 push rdi
78 push rsi
79 mov rdi, rcx
80 mov rsi, rdx
81#endif
82
83// Result term 0
84
85 mov rax, [x]
86 mul rax
87
88 mov [z], rax
89 mov t0, rdx
90 xor t1, t1
91
92// Result term 1
93
94 xor t2, t2
95 combadd2(t2,t1,t0,[x],[x+8])
96 mov [z+8], t0
97
98// Result term 2
99
100 xor t0, t0
101 combadd1(t0,t2,t1,[x+8])
102 combadd2(t0,t2,t1,[x],[x+16])
103 mov [z+16], t1
104
105// Result term 3
106
107 xor t1, t1
108 combadd2(t1,t0,t2,[x],[x+24])
109 combadd2(t1,t0,t2,[x+8],[x+16])
110 mov [z+24], t2
111
112// Result term 4
113
114 xor t2, t2
115 combadd2(t2,t1,t0,[x+8],[x+24])
116 combadd1(t2,t1,t0,[x+16])
117 mov [z+32], t0
118
119// Result term 5
120
121 xor t0, t0
122 combadd2(t0,t2,t1,[x+16],[x+24])
123 mov [z+40], t1
124
125// Result term 6
126
127 xor t1, t1
128 combads(t0,t2,[x+24])
129 mov [z+48], t2
130
131// Result term 7
132
133 mov [z+56], t0
134
135// Return
136
137#if WINDOWS_ABI
138 pop rsi
139 pop rdi
140#endif
141 ret
142
143#if defined(__linux__) && defined(__ELF__)
144.section .note.GNU-stack,"",%progbits
145#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S
deleted file mode 100644
index ac0b6f96c2..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S
+++ /dev/null
@@ -1,242 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Square, z := x^2
17// Input x[8]; output z[16]
18//
19// extern void bignum_sqr_8_16_alt (uint64_t z[static 16], uint64_t x[static 8]);
20//
21// Standard x86-64 ABI: RDI = z, RSI = x
22// Microsoft x64 ABI: RCX = z, RDX = x
23// ----------------------------------------------------------------------------
24
25#include "s2n_bignum_internal.h"
26
27 .intel_syntax noprefix
28 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_8_16_alt)
29 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_8_16_alt)
30 .text
31
32// Input arguments
33
34#define z rdi
35#define x rsi
36
37// Other variables used as a rotating 3-word window to add terms to
38
39#define t0 r8
40#define t1 r9
41#define t2 r10
42
43// Additional temporaries for local windows to share doublings
44
45#define u0 rcx
46#define u1 r11
47
48// Macro for the key "multiply and add to (c,h,l)" step
49
50#define combadd(c,h,l,numa,numb) \
51 mov rax, numa; \
52 mul QWORD PTR numb; \
53 add l, rax; \
54 adc h, rdx; \
55 adc c, 0
56
57// Set up initial window (c,h,l) = numa * numb
58
59#define combaddz(c,h,l,numa,numb) \
60 mov rax, numa; \
61 mul QWORD PTR numb; \
62 xor c, c; \
63 mov l, rax; \
64 mov h, rdx
65
66// Doubling step (c,h,l) = 2 * (c,hh,ll) + (0,h,l)
67
68#define doubladd(c,h,l,hh,ll) \
69 add ll, ll; \
70 adc hh, hh; \
71 adc c, c; \
72 add l, ll; \
73 adc h, hh; \
74 adc c, 0
75
76// Square term incorporation (c,h,l) += numba^2
77
78#define combadd1(c,h,l,numa) \
79 mov rax, numa; \
80 mul rax; \
81 add l, rax; \
82 adc h, rdx; \
83 adc c, 0
84
85// A short form where we don't expect a top carry
86
87#define combads(h,l,numa) \
88 mov rax, numa; \
89 mul rax; \
90 add l, rax; \
91 adc h, rdx
92
93// A version doubling directly before adding, for single non-square terms
94
95#define combadd2(c,h,l,numa,numb) \
96 mov rax, numa; \
97 mul QWORD PTR numb; \
98 add rax, rax; \
99 adc rdx, rdx; \
100 adc c, 0; \
101 add l, rax; \
102 adc h, rdx; \
103 adc c, 0
104
105S2N_BN_SYMBOL(bignum_sqr_8_16_alt):
106 _CET_ENDBR
107
108#if WINDOWS_ABI
109 push rdi
110 push rsi
111 mov rdi, rcx
112 mov rsi, rdx
113#endif
114
115// Result term 0
116
117 mov rax, [x]
118 mul rax
119
120 mov [z], rax
121 mov t0, rdx
122 xor t1, t1
123
124// Result term 1
125
126 xor t2, t2
127 combadd2(t2,t1,t0,[x],[x+8])
128 mov [z+8], t0
129
130// Result term 2
131
132 xor t0, t0
133 combadd1(t0,t2,t1,[x+8])
134 combadd2(t0,t2,t1,[x],[x+16])
135 mov [z+16], t1
136
137// Result term 3
138
139 combaddz(t1,u1,u0,[x],[x+24])
140 combadd(t1,u1,u0,[x+8],[x+16])
141 doubladd(t1,t0,t2,u1,u0)
142 mov [z+24], t2
143
144// Result term 4
145
146 combaddz(t2,u1,u0,[x],[x+32])
147 combadd(t2,u1,u0,[x+8],[x+24])
148 doubladd(t2,t1,t0,u1,u0)
149 combadd1(t2,t1,t0,[x+16])
150 mov [z+32], t0
151
152// Result term 5
153
154 combaddz(t0,u1,u0,[x],[x+40])
155 combadd(t0,u1,u0,[x+8],[x+32])
156 combadd(t0,u1,u0,[x+16],[x+24])
157 doubladd(t0,t2,t1,u1,u0)
158 mov [z+40], t1
159
160// Result term 6
161
162 combaddz(t1,u1,u0,[x],[x+48])
163 combadd(t1,u1,u0,[x+8],[x+40])
164 combadd(t1,u1,u0,[x+16],[x+32])
165 doubladd(t1,t0,t2,u1,u0)
166 combadd1(t1,t0,t2,[x+24])
167 mov [z+48], t2
168
169// Result term 7
170
171 combaddz(t2,u1,u0,[x],[x+56])
172 combadd(t2,u1,u0,[x+8],[x+48])
173 combadd(t2,u1,u0,[x+16],[x+40])
174 combadd(t2,u1,u0,[x+24],[x+32])
175 doubladd(t2,t1,t0,u1,u0)
176 mov [z+56], t0
177
178// Result term 8
179
180 combaddz(t0,u1,u0,[x+8],[x+56])
181 combadd(t0,u1,u0,[x+16],[x+48])
182 combadd(t0,u1,u0,[x+24],[x+40])
183 doubladd(t0,t2,t1,u1,u0)
184 combadd1(t0,t2,t1,[x+32])
185 mov [z+64], t1
186
187// Result term 9
188
189 combaddz(t1,u1,u0,[x+16],[x+56])
190 combadd(t1,u1,u0,[x+24],[x+48])
191 combadd(t1,u1,u0,[x+32],[x+40])
192 doubladd(t1,t0,t2,u1,u0)
193 mov [z+72], t2
194
195// Result term 10
196
197 combaddz(t2,u1,u0,[x+24],[x+56])
198 combadd(t2,u1,u0,[x+32],[x+48])
199 doubladd(t2,t1,t0,u1,u0)
200 combadd1(t2,t1,t0,[x+40])
201 mov [z+80], t0
202
203// Result term 11
204
205 combaddz(t0,u1,u0,[x+32],[x+56])
206 combadd(t0,u1,u0,[x+40],[x+48])
207 doubladd(t0,t2,t1,u1,u0)
208 mov [z+88], t1
209
210// Result term 12
211
212 xor t1, t1
213 combadd2(t1,t0,t2,[x+40],[x+56])
214 combadd1(t1,t0,t2,[x+48])
215 mov [z+96], t2
216
217// Result term 13
218
219 xor t2, t2
220 combadd2(t2,t1,t0,[x+48],[x+56])
221 mov [z+104], t0
222
223// Result term 14
224
225 combads(t2,t1,[x+56])
226 mov [z+112], t1
227
228// Result term 15
229
230 mov [z+120], t2
231
232// Return
233
234#if WINDOWS_ABI
235 pop rsi
236 pop rdi
237#endif
238 ret
239
240#if defined(__linux__) && defined(__ELF__)
241.section .note.GNU-stack,"",%progbits
242#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S
deleted file mode 100644
index 3ff8a30510..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S
+++ /dev/null
@@ -1,153 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Subtract, z := x - y
17// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
18//
19// extern uint64_t bignum_sub
20// (uint64_t p, uint64_t *z,
21// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
22//
23// Does the z := x - y operation, truncating modulo p words in general and
24// returning a top borrow (0 or 1) in the p'th place, only subtracting input
25// words below p (as well as m and n respectively) to get the diff and borrow.
26//
27// Standard x86-64 ABI: RDI = p, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y, returns RAX
28// Microsoft x64 ABI: RCX = p, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y, returns RAX
29// ----------------------------------------------------------------------------
30
31#include "s2n_bignum_internal.h"
32
33 .intel_syntax noprefix
34 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sub)
35 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sub)
36 .text
37
38#define p rdi
39#define z rsi
40#define m rdx
41#define x rcx
42#define n r8
43#define y r9
44#define i r10
45#define a rax
46
47#define ashort eax
48
49
50
51S2N_BN_SYMBOL(bignum_sub):
52 _CET_ENDBR
53
54#if WINDOWS_ABI
55 push rdi
56 push rsi
57 mov rdi, rcx
58 mov rsi, rdx
59 mov rdx, r8
60 mov rcx, r9
61 mov r8, [rsp+56]
62 mov r9, [rsp+64]
63#endif
64
65// Zero the main index counter for both branches
66
67 xor i, i
68
69// First clamp the two input sizes m := min(p,m) and n := min(p,n) since
70// we'll never need words past the p'th. Can now assume m <= p and n <= p.
71// Then compare the modified m and n and branch accordingly
72
73 cmp p, m
74 cmovc m, p
75 cmp p, n
76 cmovc n, p
77 cmp m, n
78 jc ylonger
79
80// The case where x is longer or of the same size (p >= m >= n)
81
82 sub p, m
83 sub m, n
84 inc m
85 test n, n
86 jz xtest
87xmainloop:
88 mov a, [x+8*i]
89 sbb a, [y+8*i]
90 mov [z+8*i],a
91 inc i
92 dec n
93 jnz xmainloop
94 jmp xtest
95xtoploop:
96 mov a, [x+8*i]
97 sbb a, 0
98 mov [z+8*i],a
99 inc i
100xtest:
101 dec m
102 jnz xtoploop
103 sbb a, a
104 test p, p
105 jz tailskip
106tailloop:
107 mov [z+8*i],a
108 inc i
109 dec p
110 jnz tailloop
111tailskip:
112 neg a
113#if WINDOWS_ABI
114 pop rsi
115 pop rdi
116#endif
117 ret
118
119// The case where y is longer (p >= n > m)
120
121ylonger:
122
123 sub p, n
124 sub n, m
125 test m, m
126 jz ytoploop
127ymainloop:
128 mov a, [x+8*i]
129 sbb a, [y+8*i]
130 mov [z+8*i],a
131 inc i
132 dec m
133 jnz ymainloop
134ytoploop:
135 mov ashort, 0
136 sbb a, [y+8*i]
137 mov [z+8*i],a
138 inc i
139 dec n
140 jnz ytoploop
141 sbb a, a
142 test p, p
143 jnz tailloop
144 neg a
145#if WINDOWS_ABI
146 pop rsi
147 pop rdi
148#endif
149 ret
150
151#if defined(__linux__) && defined(__ELF__)
152.section .note.GNU-stack,"",%progbits
153#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bn_arch.c b/src/lib/libcrypto/bn/arch/amd64/bn_arch.c
deleted file mode 100644
index a377a05681..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bn_arch.c
+++ /dev/null
@@ -1,131 +0,0 @@
1/* $OpenBSD: bn_arch.c,v 1.7 2023/06/24 16:01:44 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <openssl/bn.h>
19
20#include "bn_arch.h"
21#include "bn_local.h"
22#include "s2n_bignum.h"
23
24#ifdef HAVE_BN_ADD
25BN_ULONG
26bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b,
27 int b_len)
28{
29 return bignum_add(r_len, (uint64_t *)r, a_len, (uint64_t *)a,
30 b_len, (uint64_t *)b);
31}
32#endif
33
34
35#ifdef HAVE_BN_ADD_WORDS
36BN_ULONG
37bn_add_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n)
38{
39 return bignum_add(n, (uint64_t *)rd, n, (uint64_t *)ad, n,
40 (uint64_t *)bd);
41}
42#endif
43
44#ifdef HAVE_BN_SUB
45BN_ULONG
46bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b,
47 int b_len)
48{
49 return bignum_sub(r_len, (uint64_t *)r, a_len, (uint64_t *)a,
50 b_len, (uint64_t *)b);
51}
52#endif
53
54#ifdef HAVE_BN_SUB_WORDS
55BN_ULONG
56bn_sub_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n)
57{
58 return bignum_sub(n, (uint64_t *)rd, n, (uint64_t *)ad, n,
59 (uint64_t *)bd);
60}
61#endif
62
63#ifdef HAVE_BN_MUL_ADD_WORDS
64BN_ULONG
65bn_mul_add_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w)
66{
67 return bignum_cmadd(num, (uint64_t *)rd, w, num, (uint64_t *)ad);
68}
69#endif
70
71#ifdef HAVE_BN_MUL_WORDS
72BN_ULONG
73bn_mul_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w)
74{
75 return bignum_cmul(num, (uint64_t *)rd, w, num, (uint64_t *)ad);
76}
77#endif
78
79#ifdef HAVE_BN_MUL_COMBA4
80void
81bn_mul_comba4(BN_ULONG *rd, BN_ULONG *ad, BN_ULONG *bd)
82{
83 /* XXX - consider using non-alt on CPUs that have the ADX extension. */
84 bignum_mul_4_8_alt((uint64_t *)rd, (uint64_t *)ad, (uint64_t *)bd);
85}
86#endif
87
88#ifdef HAVE_BN_MUL_COMBA8
89void
90bn_mul_comba8(BN_ULONG *rd, BN_ULONG *ad, BN_ULONG *bd)
91{
92 /* XXX - consider using non-alt on CPUs that have the ADX extension. */
93 bignum_mul_8_16_alt((uint64_t *)rd, (uint64_t *)ad, (uint64_t *)bd);
94}
95#endif
96
97#ifdef HAVE_BN_SQR
98int
99bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx)
100{
101 bignum_sqr(r_len, (uint64_t *)r->d, a->top, (uint64_t *)a->d);
102
103 return 1;
104}
105#endif
106
107#ifdef HAVE_BN_SQR_COMBA4
108void
109bn_sqr_comba4(BN_ULONG *rd, const BN_ULONG *ad)
110{
111 /* XXX - consider using non-alt on CPUs that have the ADX extension. */
112 bignum_sqr_4_8_alt((uint64_t *)rd, (uint64_t *)ad);
113}
114#endif
115
116#ifdef HAVE_BN_SQR_COMBA8
117void
118bn_sqr_comba8(BN_ULONG *rd, const BN_ULONG *ad)
119{
120 /* XXX - consider using non-alt on CPUs that have the ADX extension. */
121 bignum_sqr_8_16_alt((uint64_t *)rd, (uint64_t *)ad);
122}
123#endif
124
125#ifdef HAVE_BN_WORD_CLZ
126int
127bn_word_clz(BN_ULONG w)
128{
129 return word_clz(w);
130}
131#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h b/src/lib/libcrypto/bn/arch/amd64/bn_arch.h
deleted file mode 100644
index 927cd75208..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h
+++ /dev/null
@@ -1,109 +0,0 @@
1/* $OpenBSD: bn_arch.h,v 1.14 2024/03/26 06:09:25 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <openssl/bn.h>
19
20#ifndef HEADER_BN_ARCH_H
21#define HEADER_BN_ARCH_H
22
23#ifndef OPENSSL_NO_ASM
24
25#define HAVE_BN_ADD
26#define HAVE_BN_ADD_WORDS
27
28#define HAVE_BN_DIV_WORDS
29
30#define HAVE_BN_MUL_ADD_WORDS
31#define HAVE_BN_MUL_COMBA4
32#define HAVE_BN_MUL_COMBA8
33#define HAVE_BN_MUL_WORDS
34
35#define HAVE_BN_SQR
36#define HAVE_BN_SQR_COMBA4
37#define HAVE_BN_SQR_COMBA8
38
39#define HAVE_BN_SUB
40#define HAVE_BN_SUB_WORDS
41
42#define HAVE_BN_WORD_CLZ
43
44#if defined(__GNUC__)
45
46#define HAVE_BN_DIV_REM_WORDS_INLINE
47
48static inline void
49bn_div_rem_words_inline(BN_ULONG h, BN_ULONG l, BN_ULONG d, BN_ULONG *out_q,
50 BN_ULONG *out_r)
51{
52 BN_ULONG q, r;
53
54 /*
55 * Unsigned division of %rdx:%rax by d with quotient being stored in
56 * %rax and remainder in %rdx.
57 */
58 __asm__ volatile ("divq %4"
59 : "=a"(q), "=d"(r)
60 : "d"(h), "a"(l), "rm"(d)
61 : "cc");
62
63 *out_q = q;
64 *out_r = r;
65}
66
67#define HAVE_BN_MULW
68
69static inline void
70bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
71{
72 BN_ULONG r1, r0;
73
74 /*
75 * Unsigned multiplication of %rax, with the double word result being
76 * stored in %rdx:%rax.
77 */
78 __asm__ ("mulq %3"
79 : "=d"(r1), "=a"(r0)
80 : "a"(a), "rm"(b)
81 : "cc");
82
83 *out_r1 = r1;
84 *out_r0 = r0;
85}
86
87#define HAVE_BN_SUBW
88
89static inline void
90bn_subw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_borrow, BN_ULONG *out_r0)
91{
92 BN_ULONG borrow, r0;
93
94 __asm__ (
95 "subq %3, %1 \n"
96 "setb %b0 \n"
97 "and $1, %0 \n"
98 : "=r"(borrow), "=r"(r0)
99 : "1"(a), "rm"(b)
100 : "cc");
101
102 *out_borrow = borrow;
103 *out_r0 = r0;
104}
105
106#endif /* __GNUC__ */
107
108#endif
109#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/word_clz.S b/src/lib/libcrypto/bn/arch/amd64/word_clz.S
deleted file mode 100644
index 3926fcd4b0..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/word_clz.S
+++ /dev/null
@@ -1,60 +0,0 @@
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Count leading zero bits in a single word
17// Input a; output function return
18//
19// extern uint64_t word_clz (uint64_t a);
20//
21// Standard x86-64 ABI: RDI = a, returns RAX
22// Microsoft x64 ABI: RCX = a, returns RAX
23// ----------------------------------------------------------------------------
24
25#include "s2n_bignum_internal.h"
26
27 .intel_syntax noprefix
28 S2N_BN_SYM_VISIBILITY_DIRECTIVE(word_clz)
29 S2N_BN_SYM_PRIVACY_DIRECTIVE(word_clz)
30 .text
31
32S2N_BN_SYMBOL(word_clz):
33 _CET_ENDBR
34
35#if WINDOWS_ABI
36 push rdi
37 push rsi
38 mov rdi, rcx
39#endif
40
41// First do rax = 63 - bsr(a), which is right except (maybe) for zero inputs
42
43 bsr rax, rdi
44 xor rax, 63
45
46// Force return of 64 in the zero-input case
47
48 mov edx, 64
49 test rdi, rdi
50 cmove rax, rdx
51
52#if WINDOWS_ABI
53 pop rsi
54 pop rdi
55#endif
56 ret
57
58#if defined(__linux__) && defined(__ELF__)
59.section .note.GNU-stack,"",%progbits
60#endif