summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorjsing <>2025-08-11 14:11:20 +0000
committerjsing <>2025-08-11 14:11:20 +0000
commit28d52ec2924676a240d0477f564160bd054d5549 (patch)
tree22e2b3da2203c9ba206a85da5dd6bf7a67c0a1e8 /src
parent4fd25eed849a1e1cc5f0497cf86579813203b406 (diff)
downloadopenbsd-28d52ec2924676a240d0477f564160bd054d5549.tar.gz
openbsd-28d52ec2924676a240d0477f564160bd054d5549.tar.bz2
openbsd-28d52ec2924676a240d0477f564160bd054d5549.zip
Resync s2n-bignum primitives for amd64 with upstream.
This amounts to whitespace changes and label renaming.
Diffstat (limited to 'src')
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_add.S49
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S30
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S26
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_mul.S23
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S6
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S7
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S27
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S6
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S5
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bignum_sub.S45
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/word_clz.S4
11 files changed, 113 insertions, 115 deletions
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S b/src/lib/libcrypto/bn/arch/amd64/bignum_add.S
index 5fe4aae7a1..5ec0e36282 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_add.S
@@ -16,9 +16,8 @@
16// Add, z := x + y 16// Add, z := x + y
17// Inputs x[m], y[n]; outputs function return (carry-out) and z[p] 17// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
18// 18//
19// extern uint64_t bignum_add 19// extern uint64_t bignum_add(uint64_t p, uint64_t *z, uint64_t m,
20// (uint64_t p, uint64_t *z, 20// const uint64_t *x, uint64_t n, const uint64_t *y);
21// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
22// 21//
23// Does the z := x + y operation, truncating modulo p words in general and 22// Does the z := x + y operation, truncating modulo p words in general and
24// returning a top carry (0 or 1) in the p'th place, only adding the input 23// returning a top carry (0 or 1) in the p'th place, only adding the input
@@ -49,7 +48,7 @@
49 48
50 49
51S2N_BN_SYMBOL(bignum_add): 50S2N_BN_SYMBOL(bignum_add):
52 _CET_ENDBR 51 _CET_ENDBR
53 52
54#if WINDOWS_ABI 53#if WINDOWS_ABI
55 push rdi 54 push rdi
@@ -75,7 +74,7 @@ S2N_BN_SYMBOL(bignum_add):
75 cmp p, n 74 cmp p, n
76 cmovc n, p 75 cmovc n, p
77 cmp m, n 76 cmp m, n
78 jc ylonger 77 jc bignum_add_ylonger
79 78
80// The case where x is longer or of the same size (p >= m >= n) 79// The case where x is longer or of the same size (p >= m >= n)
81 80
@@ -83,27 +82,27 @@ S2N_BN_SYMBOL(bignum_add):
83 sub m, n 82 sub m, n
84 inc m 83 inc m
85 test n, n 84 test n, n
86 jz xtest 85 jz bignum_add_xtest
87xmainloop: 86bignum_add_xmainloop:
88 mov a, [x+8*i] 87 mov a, [x+8*i]
89 adc a, [y+8*i] 88 adc a, [y+8*i]
90 mov [z+8*i],a 89 mov [z+8*i],a
91 inc i 90 inc i
92 dec n 91 dec n
93 jnz xmainloop 92 jnz bignum_add_xmainloop
94 jmp xtest 93 jmp bignum_add_xtest
95xtoploop: 94bignum_add_xtoploop:
96 mov a, [x+8*i] 95 mov a, [x+8*i]
97 adc a, 0 96 adc a, 0
98 mov [z+8*i],a 97 mov [z+8*i],a
99 inc i 98 inc i
100xtest: 99bignum_add_xtest:
101 dec m 100 dec m
102 jnz xtoploop 101 jnz bignum_add_xtoploop
103 mov ashort, 0 102 mov ashort, 0
104 adc a, 0 103 adc a, 0
105 test p, p 104 test p, p
106 jnz tails 105 jnz bignum_add_tails
107#if WINDOWS_ABI 106#if WINDOWS_ABI
108 pop rsi 107 pop rsi
109 pop rdi 108 pop rdi
@@ -112,30 +111,30 @@ xtest:
112 111
113// The case where y is longer (p >= n > m) 112// The case where y is longer (p >= n > m)
114 113
115ylonger: 114bignum_add_ylonger:
116 115
117 sub p, n 116 sub p, n
118 sub n, m 117 sub n, m
119 test m, m 118 test m, m
120 jz ytoploop 119 jz bignum_add_ytoploop
121ymainloop: 120bignum_add_ymainloop:
122 mov a, [x+8*i] 121 mov a, [x+8*i]
123 adc a, [y+8*i] 122 adc a, [y+8*i]
124 mov [z+8*i],a 123 mov [z+8*i],a
125 inc i 124 inc i
126 dec m 125 dec m
127 jnz ymainloop 126 jnz bignum_add_ymainloop
128ytoploop: 127bignum_add_ytoploop:
129 mov a, [y+8*i] 128 mov a, [y+8*i]
130 adc a, 0 129 adc a, 0
131 mov [z+8*i],a 130 mov [z+8*i],a
132 inc i 131 inc i
133 dec n 132 dec n
134 jnz ytoploop 133 jnz bignum_add_ytoploop
135 mov ashort, 0 134 mov ashort, 0
136 adc a, 0 135 adc a, 0
137 test p, p 136 test p, p
138 jnz tails 137 jnz bignum_add_tails
139#if WINDOWS_ABI 138#if WINDOWS_ABI
140 pop rsi 139 pop rsi
141 pop rdi 140 pop rdi
@@ -144,16 +143,16 @@ ytoploop:
144 143
145// Adding a non-trivial tail, when p > max(m,n) 144// Adding a non-trivial tail, when p > max(m,n)
146 145
147tails: 146bignum_add_tails:
148 mov [z+8*i],a 147 mov [z+8*i],a
149 xor a, a 148 xor a, a
150 jmp tail 149 jmp bignum_add_tail
151tailloop: 150bignum_add_tailloop:
152 mov [z+8*i],a 151 mov [z+8*i],a
153tail: 152bignum_add_tail:
154 inc i 153 inc i
155 dec p 154 dec p
156 jnz tailloop 155 jnz bignum_add_tailloop
157#if WINDOWS_ABI 156#if WINDOWS_ABI
158 pop rsi 157 pop rsi
159 pop rdi 158 pop rdi
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S
index 25ba17bce2..ebbacec344 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S
@@ -16,8 +16,8 @@
16// Multiply-add with single-word multiplier, z := z + c * y 16// Multiply-add with single-word multiplier, z := z + c * y
17// Inputs c, y[n]; outputs function return (carry-out) and z[k] 17// Inputs c, y[n]; outputs function return (carry-out) and z[k]
18// 18//
19// extern uint64_t bignum_cmadd 19// extern uint64_t bignum_cmadd(uint64_t k, uint64_t *z, uint64_t c, uint64_t n,
20// (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); 20// const uint64_t *y);
21// 21//
22// Does the "z := z + c * y" operation where y is n digits, result z is p. 22// Does the "z := z + c * y" operation where y is n digits, result z is p.
23// Truncates the result in general. 23// Truncates the result in general.
@@ -54,7 +54,7 @@
54 54
55 55
56S2N_BN_SYMBOL(bignum_cmadd): 56S2N_BN_SYMBOL(bignum_cmadd):
57 _CET_ENDBR 57 _CET_ENDBR
58 58
59#if WINDOWS_ABI 59#if WINDOWS_ABI
60 push rdi 60 push rdi
@@ -82,7 +82,7 @@ S2N_BN_SYMBOL(bignum_cmadd):
82 82
83 xor h, h 83 xor h, h
84 test n, n 84 test n, n
85 jz end 85 jz bignum_cmadd_end
86 86
87// Move c into a safer register as multiplies overwrite rdx 87// Move c into a safer register as multiplies overwrite rdx
88 88
@@ -96,11 +96,11 @@ S2N_BN_SYMBOL(bignum_cmadd):
96 mov h, rdx 96 mov h, rdx
97 mov ishort, 1 97 mov ishort, 1
98 dec n 98 dec n
99 jz hightail 99 jz bignum_cmadd_hightail
100 100
101// Main loop, where we always have CF + previous high part h to add in 101// Main loop, where we always have CF + previous high part h to add in
102 102
103loop: 103bignum_cmadd_loop:
104 adc h, [z+8*i] 104 adc h, [z+8*i]
105 sbb r, r 105 sbb r, r
106 mov rax, [x+8*i] 106 mov rax, [x+8*i]
@@ -111,36 +111,36 @@ loop:
111 mov h, rdx 111 mov h, rdx
112 inc i 112 inc i
113 dec n 113 dec n
114 jnz loop 114 jnz bignum_cmadd_loop
115 115
116hightail: 116bignum_cmadd_hightail:
117 adc h, 0 117 adc h, 0
118 118
119// Propagate the carry all the way to the end with h as extra carry word 119// Propagate the carry all the way to the end with h as extra carry word
120 120
121tail: 121bignum_cmadd_tail:
122 test p, p 122 test p, p
123 jz end 123 jz bignum_cmadd_end
124 124
125 add [z+8*i], h 125 add [z+8*i], h
126 mov hshort, 0 126 mov hshort, 0
127 inc i 127 inc i
128 dec p 128 dec p
129 jz highend 129 jz bignum_cmadd_highend
130 130
131tloop: 131bignum_cmadd_tloop:
132 adc [z+8*i], h 132 adc [z+8*i], h
133 inc i 133 inc i
134 dec p 134 dec p
135 jnz tloop 135 jnz bignum_cmadd_tloop
136 136
137highend: 137bignum_cmadd_highend:
138 138
139 adc h, 0 139 adc h, 0
140 140
141// Return the high/carry word 141// Return the high/carry word
142 142
143end: 143bignum_cmadd_end:
144 mov rax, h 144 mov rax, h
145 145
146 pop rbx 146 pop rbx
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S
index 12f785d63a..3e28e37535 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S
@@ -16,8 +16,8 @@
16// Multiply by a single word, z := c * y 16// Multiply by a single word, z := c * y
17// Inputs c, y[n]; outputs function return (carry-out) and z[k] 17// Inputs c, y[n]; outputs function return (carry-out) and z[k]
18// 18//
19// extern uint64_t bignum_cmul 19// extern uint64_t bignum_cmul(uint64_t k, uint64_t *z, uint64_t c, uint64_t n,
20// (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); 20// const uint64_t *y);
21// 21//
22// Does the "z := c * y" operation where y is n digits, result z is p. 22// Does the "z := c * y" operation where y is n digits, result z is p.
23// Truncates the result in general unless p >= n + 1. 23// Truncates the result in general unless p >= n + 1.
@@ -51,7 +51,7 @@
51 51
52 52
53S2N_BN_SYMBOL(bignum_cmul): 53S2N_BN_SYMBOL(bignum_cmul):
54 _CET_ENDBR 54 _CET_ENDBR
55 55
56#if WINDOWS_ABI 56#if WINDOWS_ABI
57 push rdi 57 push rdi
@@ -76,7 +76,7 @@ S2N_BN_SYMBOL(bignum_cmul):
76 xor h, h 76 xor h, h
77 xor i, i 77 xor i, i
78 test n, n 78 test n, n
79 jz tail 79 jz bignum_cmul_tail
80 80
81// Move c into a safer register as multiplies overwrite rdx 81// Move c into a safer register as multiplies overwrite rdx
82 82
@@ -90,11 +90,11 @@ S2N_BN_SYMBOL(bignum_cmul):
90 mov h, rdx 90 mov h, rdx
91 inc i 91 inc i
92 cmp i, n 92 cmp i, n
93 jz tail 93 jz bignum_cmul_tail
94 94
95// Main loop doing the multiplications 95// Main loop doing the multiplications
96 96
97loop: 97bignum_cmul_loop:
98 mov rax, [x+8*i] 98 mov rax, [x+8*i]
99 mul c 99 mul c
100 add rax, h 100 add rax, h
@@ -103,28 +103,28 @@ loop:
103 mov h, rdx 103 mov h, rdx
104 inc i 104 inc i
105 cmp i, n 105 cmp i, n
106 jc loop 106 jc bignum_cmul_loop
107 107
108// Add a tail when the destination is longer 108// Add a tail when the destination is longer
109 109
110tail: 110bignum_cmul_tail:
111 cmp i, p 111 cmp i, p
112 jnc end 112 jnc bignum_cmul_end
113 mov [z+8*i], h 113 mov [z+8*i], h
114 xor h, h 114 xor h, h
115 inc i 115 inc i
116 cmp i, p 116 cmp i, p
117 jnc end 117 jnc bignum_cmul_end
118 118
119tloop: 119bignum_cmul_tloop:
120 mov [z+8*i], h 120 mov [z+8*i], h
121 inc i 121 inc i
122 cmp i, p 122 cmp i, p
123 jc tloop 123 jc bignum_cmul_tloop
124 124
125// Return the high/carry word 125// Return the high/carry word
126 126
127end: 127bignum_cmul_end:
128 mov rax, h 128 mov rax, h
129 129
130#if WINDOWS_ABI 130#if WINDOWS_ABI
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S
index a3552679a2..3bc09de30a 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S
@@ -16,9 +16,8 @@
16// Multiply z := x * y 16// Multiply z := x * y
17// Inputs x[m], y[n]; output z[k] 17// Inputs x[m], y[n]; output z[k]
18// 18//
19// extern void bignum_mul 19// extern void bignum_mul(uint64_t k, uint64_t *z, uint64_t m, const uint64_t *x,
20// (uint64_t k, uint64_t *z, 20// uint64_t n, const uint64_t *y);
21// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
22// 21//
23// Does the "z := x * y" operation where x is m digits, y is n, result z is k. 22// Does the "z := x * y" operation where x is m digits, y is n, result z is k.
24// Truncates the result in general unless k >= m + n 23// Truncates the result in general unless k >= m + n
@@ -59,7 +58,7 @@
59 58
60 59
61S2N_BN_SYMBOL(bignum_mul): 60S2N_BN_SYMBOL(bignum_mul):
62 _CET_ENDBR 61 _CET_ENDBR
63 62
64#if WINDOWS_ABI 63#if WINDOWS_ABI
65 push rdi 64 push rdi
@@ -88,7 +87,7 @@ S2N_BN_SYMBOL(bignum_mul):
88// If we did a multiply-add variant, however, then we could 87// If we did a multiply-add variant, however, then we could
89 88
90 test p, p 89 test p, p
91 jz end 90 jz bignum_mul_end
92 91
93// Set initial 2-part sum to zero (we zero c inside the body) 92// Set initial 2-part sum to zero (we zero c inside the body)
94 93
@@ -99,7 +98,7 @@ S2N_BN_SYMBOL(bignum_mul):
99 98
100 xor k, k 99 xor k, k
101 100
102outerloop: 101bignum_mul_outerloop:
103 102
104// Zero our carry term first; we eventually want it and a zero is useful now 103// Zero our carry term first; we eventually want it and a zero is useful now
105// Set a = max 0 (k + 1 - n), i = min (k + 1) m 104// Set a = max 0 (k + 1 - n), i = min (k + 1) m
@@ -125,11 +124,11 @@ outerloop:
125 mov d, k 124 mov d, k
126 sub d, i 125 sub d, i
127 sub i, a 126 sub i, a
128 jbe innerend 127 jbe bignum_mul_innerend
129 lea x,[rcx+8*a] 128 lea x,[rcx+8*a]
130 lea y,[r9+8*d-8] 129 lea y,[r9+8*d-8]
131 130
132innerloop: 131bignum_mul_innerloop:
133 mov rax, [y+8*i] 132 mov rax, [y+8*i]
134 mul QWORD PTR [x] 133 mul QWORD PTR [x]
135 add x, 8 134 add x, 8
@@ -137,9 +136,9 @@ innerloop:
137 adc h, rdx 136 adc h, rdx
138 adc c, 0 137 adc c, 0
139 dec i 138 dec i
140 jnz innerloop 139 jnz bignum_mul_innerloop
141 140
142innerend: 141bignum_mul_innerend:
143 142
144 mov [z], l 143 mov [z], l
145 mov l, h 144 mov l, h
@@ -147,9 +146,9 @@ innerend:
147 add z, 8 146 add z, 8
148 147
149 cmp k, p 148 cmp k, p
150 jc outerloop 149 jc bignum_mul_outerloop
151 150
152end: 151bignum_mul_end:
153 pop r15 152 pop r15
154 pop r14 153 pop r14
155 pop r13 154 pop r13
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S
index 70ff69e372..5e04bcc009 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S
@@ -16,8 +16,8 @@
16// Multiply z := x * y 16// Multiply z := x * y
17// Inputs x[4], y[4]; output z[8] 17// Inputs x[4], y[4]; output z[8]
18// 18//
19// extern void bignum_mul_4_8_alt 19// extern void bignum_mul_4_8_alt(uint64_t z[static 8], const uint64_t x[static 4],
20// (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]); 20// const uint64_t y[static 4]);
21// 21//
22// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y 22// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y
23// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y 23// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y
@@ -72,7 +72,7 @@
72 adc h, rdx 72 adc h, rdx
73 73
74S2N_BN_SYMBOL(bignum_mul_4_8_alt): 74S2N_BN_SYMBOL(bignum_mul_4_8_alt):
75 _CET_ENDBR 75 _CET_ENDBR
76 76
77#if WINDOWS_ABI 77#if WINDOWS_ABI
78 push rdi 78 push rdi
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S
index 066403b074..4d54168c90 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S
@@ -16,8 +16,9 @@
16// Multiply z := x * y 16// Multiply z := x * y
17// Inputs x[8], y[8]; output z[16] 17// Inputs x[8], y[8]; output z[16]
18// 18//
19// extern void bignum_mul_8_16_alt 19// extern void bignum_mul_8_16_alt(uint64_t z[static 16],
20// (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]); 20// const uint64_t x[static 8],
21// const uint64_t y[static 8]);
21// 22//
22// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y 23// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y
23// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y 24// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y
@@ -72,7 +73,7 @@
72 adc h, rdx 73 adc h, rdx
73 74
74S2N_BN_SYMBOL(bignum_mul_8_16_alt): 75S2N_BN_SYMBOL(bignum_mul_8_16_alt):
75 _CET_ENDBR 76 _CET_ENDBR
76 77
77#if WINDOWS_ABI 78#if WINDOWS_ABI
78 push rdi 79 push rdi
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S
index 54e3f59442..48cc182b72 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S
@@ -16,8 +16,7 @@
16// Square z := x^2 16// Square z := x^2
17// Input x[n]; output z[k] 17// Input x[n]; output z[k]
18// 18//
19// extern void bignum_sqr 19// extern void bignum_sqr(uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x);
20// (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x);
21// 20//
22// Does the "z := x^2" operation where x is n digits and result z is k. 21// Does the "z := x^2" operation where x is n digits and result z is k.
23// Truncates the result in general unless k >= 2 * n 22// Truncates the result in general unless k >= 2 * n
@@ -62,7 +61,7 @@
62#define llshort ebp 61#define llshort ebp
63 62
64S2N_BN_SYMBOL(bignum_sqr): 63S2N_BN_SYMBOL(bignum_sqr):
65 _CET_ENDBR 64 _CET_ENDBR
66 65
67#if WINDOWS_ABI 66#if WINDOWS_ABI
68 push rdi 67 push rdi
@@ -86,7 +85,7 @@ S2N_BN_SYMBOL(bignum_sqr):
86// If p = 0 the result is trivial and nothing needs doing 85// If p = 0 the result is trivial and nothing needs doing
87 86
88 test p, p 87 test p, p
89 jz end 88 jz bignum_sqr_end
90 89
91// initialize (hh,ll) = 0 90// initialize (hh,ll) = 0
92 91
@@ -97,7 +96,7 @@ S2N_BN_SYMBOL(bignum_sqr):
97 96
98 xor k, k 97 xor k, k
99 98
100outerloop: 99bignum_sqr_outerloop:
101 100
102// First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n 101// First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n
103// We want to accumulate all x[i] * x[k - i] for bot <= i < top 102// We want to accumulate all x[i] * x[k - i] for bot <= i < top
@@ -122,7 +121,7 @@ outerloop:
122// If htop <= bot then main doubled part of the sum is empty 121// If htop <= bot then main doubled part of the sum is empty
123 122
124 cmp i, htop 123 cmp i, htop
125 jnc nosumming 124 jnc bignum_sqr_nosumming
126 125
127// Use a moving pointer for [y] = x[k-i] for the cofactor 126// Use a moving pointer for [y] = x[k-i] for the cofactor
128 127
@@ -132,7 +131,7 @@ outerloop:
132 131
133// Do the main part of the sum x[i] * x[k - i] for 2 * i < k 132// Do the main part of the sum x[i] * x[k - i] for 2 * i < k
134 133
135innerloop: 134bignum_sqr_innerloop:
136 mov a, [x+8*i] 135 mov a, [x+8*i]
137 mul QWORD PTR [y] 136 mul QWORD PTR [y]
138 add l, a 137 add l, a
@@ -141,7 +140,7 @@ innerloop:
141 sub y, 8 140 sub y, 8
142 inc i 141 inc i
143 cmp i, htop 142 cmp i, htop
144 jc innerloop 143 jc bignum_sqr_innerloop
145 144
146// Now double it 145// Now double it
147 146
@@ -151,11 +150,11 @@ innerloop:
151 150
152// If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term 151// If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term
153 152
154nosumming: 153bignum_sqr_nosumming:
155 test k, 1 154 test k, 1
156 jnz innerend 155 jnz bignum_sqr_innerend
157 cmp i, n 156 cmp i, n
158 jnc innerend 157 jnc bignum_sqr_innerend
159 158
160 mov a, [x+8*i] 159 mov a, [x+8*i]
161 mul a 160 mul a
@@ -165,7 +164,7 @@ nosumming:
165 164
166// Now add the local sum into the global sum, store and shift 165// Now add the local sum into the global sum, store and shift
167 166
168innerend: 167bignum_sqr_innerend:
169 add l, ll 168 add l, ll
170 mov [z+8*k], l 169 mov [z+8*k], l
171 adc h, hh 170 adc h, hh
@@ -175,11 +174,11 @@ innerend:
175 174
176 inc k 175 inc k
177 cmp k, p 176 cmp k, p
178 jc outerloop 177 jc bignum_sqr_outerloop
179 178
180// Restore registers and return 179// Restore registers and return
181 180
182end: 181bignum_sqr_end:
183 pop r15 182 pop r15
184 pop r14 183 pop r14
185 pop r13 184 pop r13
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S
index 7c534ae907..cb0eec0eea 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S
@@ -16,8 +16,8 @@
16// Square, z := x^2 16// Square, z := x^2
17// Input x[4]; output z[8] 17// Input x[4]; output z[8]
18// 18//
19// extern void bignum_sqr_4_8_alt 19// extern void bignum_sqr_4_8_alt(uint64_t z[static 8],
20// (uint64_t z[static 8], uint64_t x[static 4]); 20// const uint64_t x[static 4]);
21// 21//
22// Standard x86-64 ABI: RDI = z, RSI = x 22// Standard x86-64 ABI: RDI = z, RSI = x
23// Microsoft x64 ABI: RCX = z, RDX = x 23// Microsoft x64 ABI: RCX = z, RDX = x
@@ -71,7 +71,7 @@
71 adc c, 0 71 adc c, 0
72 72
73S2N_BN_SYMBOL(bignum_sqr_4_8_alt): 73S2N_BN_SYMBOL(bignum_sqr_4_8_alt):
74 _CET_ENDBR 74 _CET_ENDBR
75 75
76#if WINDOWS_ABI 76#if WINDOWS_ABI
77 push rdi 77 push rdi
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S
index ac0b6f96c2..04577d56cf 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S
@@ -16,7 +16,8 @@
16// Square, z := x^2 16// Square, z := x^2
17// Input x[8]; output z[16] 17// Input x[8]; output z[16]
18// 18//
19// extern void bignum_sqr_8_16_alt (uint64_t z[static 16], uint64_t x[static 8]); 19// extern void bignum_sqr_8_16_alt(uint64_t z[static 16],
20// const uint64_t x[static 8]);
20// 21//
21// Standard x86-64 ABI: RDI = z, RSI = x 22// Standard x86-64 ABI: RDI = z, RSI = x
22// Microsoft x64 ABI: RCX = z, RDX = x 23// Microsoft x64 ABI: RCX = z, RDX = x
@@ -103,7 +104,7 @@
103 adc c, 0 104 adc c, 0
104 105
105S2N_BN_SYMBOL(bignum_sqr_8_16_alt): 106S2N_BN_SYMBOL(bignum_sqr_8_16_alt):
106 _CET_ENDBR 107 _CET_ENDBR
107 108
108#if WINDOWS_ABI 109#if WINDOWS_ABI
109 push rdi 110 push rdi
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S
index 3ff8a30510..a18e86ba7c 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S
+++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S
@@ -16,9 +16,8 @@
16// Subtract, z := x - y 16// Subtract, z := x - y
17// Inputs x[m], y[n]; outputs function return (carry-out) and z[p] 17// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
18// 18//
19// extern uint64_t bignum_sub 19// extern uint64_t bignum_sub(uint64_t p, uint64_t *z, uint64_t m,
20// (uint64_t p, uint64_t *z, 20// const uint64_t *x, uint64_t n, const uint64_t *y);
21// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
22// 21//
23// Does the z := x - y operation, truncating modulo p words in general and 22// Does the z := x - y operation, truncating modulo p words in general and
24// returning a top borrow (0 or 1) in the p'th place, only subtracting input 23// returning a top borrow (0 or 1) in the p'th place, only subtracting input
@@ -49,7 +48,7 @@
49 48
50 49
51S2N_BN_SYMBOL(bignum_sub): 50S2N_BN_SYMBOL(bignum_sub):
52 _CET_ENDBR 51 _CET_ENDBR
53 52
54#if WINDOWS_ABI 53#if WINDOWS_ABI
55 push rdi 54 push rdi
@@ -75,7 +74,7 @@ S2N_BN_SYMBOL(bignum_sub):
75 cmp p, n 74 cmp p, n
76 cmovc n, p 75 cmovc n, p
77 cmp m, n 76 cmp m, n
78 jc ylonger 77 jc bignum_sub_ylonger
79 78
80// The case where x is longer or of the same size (p >= m >= n) 79// The case where x is longer or of the same size (p >= m >= n)
81 80
@@ -83,32 +82,32 @@ S2N_BN_SYMBOL(bignum_sub):
83 sub m, n 82 sub m, n
84 inc m 83 inc m
85 test n, n 84 test n, n
86 jz xtest 85 jz bignum_sub_xtest
87xmainloop: 86bignum_sub_xmainloop:
88 mov a, [x+8*i] 87 mov a, [x+8*i]
89 sbb a, [y+8*i] 88 sbb a, [y+8*i]
90 mov [z+8*i],a 89 mov [z+8*i],a
91 inc i 90 inc i
92 dec n 91 dec n
93 jnz xmainloop 92 jnz bignum_sub_xmainloop
94 jmp xtest 93 jmp bignum_sub_xtest
95xtoploop: 94bignum_sub_xtoploop:
96 mov a, [x+8*i] 95 mov a, [x+8*i]
97 sbb a, 0 96 sbb a, 0
98 mov [z+8*i],a 97 mov [z+8*i],a
99 inc i 98 inc i
100xtest: 99bignum_sub_xtest:
101 dec m 100 dec m
102 jnz xtoploop 101 jnz bignum_sub_xtoploop
103 sbb a, a 102 sbb a, a
104 test p, p 103 test p, p
105 jz tailskip 104 jz bignum_sub_tailskip
106tailloop: 105bignum_sub_tailloop:
107 mov [z+8*i],a 106 mov [z+8*i],a
108 inc i 107 inc i
109 dec p 108 dec p
110 jnz tailloop 109 jnz bignum_sub_tailloop
111tailskip: 110bignum_sub_tailskip:
112 neg a 111 neg a
113#if WINDOWS_ABI 112#if WINDOWS_ABI
114 pop rsi 113 pop rsi
@@ -118,29 +117,29 @@ tailskip:
118 117
119// The case where y is longer (p >= n > m) 118// The case where y is longer (p >= n > m)
120 119
121ylonger: 120bignum_sub_ylonger:
122 121
123 sub p, n 122 sub p, n
124 sub n, m 123 sub n, m
125 test m, m 124 test m, m
126 jz ytoploop 125 jz bignum_sub_ytoploop
127ymainloop: 126bignum_sub_ymainloop:
128 mov a, [x+8*i] 127 mov a, [x+8*i]
129 sbb a, [y+8*i] 128 sbb a, [y+8*i]
130 mov [z+8*i],a 129 mov [z+8*i],a
131 inc i 130 inc i
132 dec m 131 dec m
133 jnz ymainloop 132 jnz bignum_sub_ymainloop
134ytoploop: 133bignum_sub_ytoploop:
135 mov ashort, 0 134 mov ashort, 0
136 sbb a, [y+8*i] 135 sbb a, [y+8*i]
137 mov [z+8*i],a 136 mov [z+8*i],a
138 inc i 137 inc i
139 dec n 138 dec n
140 jnz ytoploop 139 jnz bignum_sub_ytoploop
141 sbb a, a 140 sbb a, a
142 test p, p 141 test p, p
143 jnz tailloop 142 jnz bignum_sub_tailloop
144 neg a 143 neg a
145#if WINDOWS_ABI 144#if WINDOWS_ABI
146 pop rsi 145 pop rsi
diff --git a/src/lib/libcrypto/bn/arch/amd64/word_clz.S b/src/lib/libcrypto/bn/arch/amd64/word_clz.S
index 3926fcd4b0..84c9c8275d 100644
--- a/src/lib/libcrypto/bn/arch/amd64/word_clz.S
+++ b/src/lib/libcrypto/bn/arch/amd64/word_clz.S
@@ -16,7 +16,7 @@
16// Count leading zero bits in a single word 16// Count leading zero bits in a single word
17// Input a; output function return 17// Input a; output function return
18// 18//
19// extern uint64_t word_clz (uint64_t a); 19// extern uint64_t word_clz(uint64_t a);
20// 20//
21// Standard x86-64 ABI: RDI = a, returns RAX 21// Standard x86-64 ABI: RDI = a, returns RAX
22// Microsoft x64 ABI: RCX = a, returns RAX 22// Microsoft x64 ABI: RCX = a, returns RAX
@@ -30,7 +30,7 @@
30 .text 30 .text
31 31
32S2N_BN_SYMBOL(word_clz): 32S2N_BN_SYMBOL(word_clz):
33 _CET_ENDBR 33 _CET_ENDBR
34 34
35#if WINDOWS_ABI 35#if WINDOWS_ABI
36 push rdi 36 push rdi