summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorjsing <>2026-01-25 08:22:17 +0000
committerjsing <>2026-01-25 08:22:17 +0000
commitc6ef8fc8d3285a55c002f52df2bb2df42b7734c0 (patch)
tree541d7b395ea34307cec81109c20f473cbc5ea8fb /src
parent1d4a03f3d650a577581aedee8cc799a3ad6668d8 (diff)
downloadopenbsd-c6ef8fc8d3285a55c002f52df2bb2df42b7734c0.tar.gz
openbsd-c6ef8fc8d3285a55c002f52df2bb2df42b7734c0.tar.bz2
openbsd-c6ef8fc8d3285a55c002f52df2bb2df42b7734c0.zip
Make SHA aarch64 assembly build with gcc.
gcc is extremely fussy about register naming and insists on q and s naming for the ARM CE SHA instructions, even though they're referring to the same register (while LLVM just figures it out). Work around this by mapping registers to their required variant at usage and defining a handful of mappings between v registers and alternate names/views. This is still somewhat ugly, but seems to be one of the cleaner options that will allow portable to enable SHA assembly on platforms that use gcc. ok kenjiro@ tb@
Diffstat (limited to 'src')
-rw-r--r--src/lib/libcrypto/sha/sha1_aarch64_ce.S61
-rw-r--r--src/lib/libcrypto/sha/sha256_aarch64_ce.S27
-rw-r--r--src/lib/libcrypto/sha/sha512_aarch64_ce.S39
3 files changed, 72 insertions, 55 deletions
diff --git a/src/lib/libcrypto/sha/sha1_aarch64_ce.S b/src/lib/libcrypto/sha/sha1_aarch64_ce.S
index 853d467641..641500a1e5 100644
--- a/src/lib/libcrypto/sha/sha1_aarch64_ce.S
+++ b/src/lib/libcrypto/sha/sha1_aarch64_ce.S
@@ -1,4 +1,4 @@
1/* $OpenBSD: sha1_aarch64_ce.S,v 1.4 2026/01/24 14:20:52 jsing Exp $ */ 1/* $OpenBSD: sha1_aarch64_ce.S,v 1.5 2026/01/25 08:22:17 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -40,6 +40,7 @@
40#define hc1s s17 40#define hc1s s17
41 41
42#define hs0 v18 42#define hs0 v18
43#define hs0q q18
43#define hs1 v19 44#define hs1 v19
44#define hs1s s19 45#define hs1s s19
45 46
@@ -76,26 +77,26 @@
76#define sha1_round1(h0, h1, w, k) \ 77#define sha1_round1(h0, h1, w, k) \
77 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ 78 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \
78 mov tmp1, h0.s[0]; \ 79 mov tmp1, h0.s[0]; \
79 sha1c h0, h1, tmp0.4s; \ 80 sha1c h0##q, h1##s, tmp0.4s; \
80 sha1h h1, tmp1 81 sha1h h1##s, tmp1
81 82
82#define sha1_round2(h0, h1, w, k) \ 83#define sha1_round2(h0, h1, w, k) \
83 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ 84 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \
84 mov tmp1, h0.s[0]; \ 85 mov tmp1, h0.s[0]; \
85 sha1p h0, h1, tmp0.4s; \ 86 sha1p h0##q, h1##s, tmp0.4s; \
86 sha1h h1, tmp1 87 sha1h h1##s, tmp1
87 88
88#define sha1_round3(h0, h1, w, k) \ 89#define sha1_round3(h0, h1, w, k) \
89 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ 90 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \
90 mov tmp1, h0.s[0]; \ 91 mov tmp1, h0.s[0]; \
91 sha1m h0, h1, tmp0.4s; \ 92 sha1m h0##q, h1##s, tmp0.4s; \
92 sha1h h1, tmp1 93 sha1h h1##s, tmp1
93 94
94#define sha1_round4(h0, h1, w, k) \ 95#define sha1_round4(h0, h1, w, k) \
95 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ 96 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \
96 mov tmp1, h0.s[0]; \ 97 mov tmp1, h0.s[0]; \
97 sha1p h0, h1, tmp0.4s; \ 98 sha1p h0##q, h1##s, tmp0.4s; \
98 sha1h h1, tmp1 99 sha1h h1##s, tmp1
99 100
100.arch armv8-a+sha2 101.arch armv8-a+sha2
101 102
@@ -140,7 +141,7 @@ sha1_block_ce:
140 141
141.Lblock_loop: 142.Lblock_loop:
142 /* Copy current hash state. */ 143 /* Copy current hash state. */
143 mov hs0.4s, hc0.4s 144 mov hs0.16b, hc0.16b
144 mov hs1s, hc1.s[0] 145 mov hs1s, hc1.s[0]
145 146
146 /* Load and byte swap message schedule. */ 147 /* Load and byte swap message schedule. */
@@ -151,10 +152,10 @@ sha1_block_ce:
151 rev32 w3.16b, w3.16b 152 rev32 w3.16b, w3.16b
152 153
153 /* Rounds 0 through 15 (four rounds at a time). */ 154 /* Rounds 0 through 15 (four rounds at a time). */
154 sha1_round1(hs0, hs1s, w0, k0) 155 sha1_round1(hs0, hs1, w0, k0)
155 sha1_round1(hs0, hs1s, w1, k0) 156 sha1_round1(hs0, hs1, w1, k0)
156 sha1_round1(hs0, hs1s, w2, k0) 157 sha1_round1(hs0, hs1, w2, k0)
157 sha1_round1(hs0, hs1s, w3, k0) 158 sha1_round1(hs0, hs1, w3, k0)
158 159
159 /* Rounds 16 through 31 (four rounds at a time). */ 160 /* Rounds 16 through 31 (four rounds at a time). */
160 sha1_message_schedule_update(w0, w1, w2, w3) 161 sha1_message_schedule_update(w0, w1, w2, w3)
@@ -162,10 +163,10 @@ sha1_block_ce:
162 sha1_message_schedule_update(w2, w3, w0, w1) 163 sha1_message_schedule_update(w2, w3, w0, w1)
163 sha1_message_schedule_update(w3, w0, w1, w2) 164 sha1_message_schedule_update(w3, w0, w1, w2)
164 165
165 sha1_round1(hs0, hs1s, w0, k0) 166 sha1_round1(hs0, hs1, w0, k0)
166 sha1_round2(hs0, hs1s, w1, k1) 167 sha1_round2(hs0, hs1, w1, k1)
167 sha1_round2(hs0, hs1s, w2, k1) 168 sha1_round2(hs0, hs1, w2, k1)
168 sha1_round2(hs0, hs1s, w3, k1) 169 sha1_round2(hs0, hs1, w3, k1)
169 170
170 /* Rounds 32 through 47 (four rounds at a time). */ 171 /* Rounds 32 through 47 (four rounds at a time). */
171 sha1_message_schedule_update(w0, w1, w2, w3) 172 sha1_message_schedule_update(w0, w1, w2, w3)
@@ -173,10 +174,10 @@ sha1_block_ce:
173 sha1_message_schedule_update(w2, w3, w0, w1) 174 sha1_message_schedule_update(w2, w3, w0, w1)
174 sha1_message_schedule_update(w3, w0, w1, w2) 175 sha1_message_schedule_update(w3, w0, w1, w2)
175 176
176 sha1_round2(hs0, hs1s, w0, k1) 177 sha1_round2(hs0, hs1, w0, k1)
177 sha1_round2(hs0, hs1s, w1, k1) 178 sha1_round2(hs0, hs1, w1, k1)
178 sha1_round3(hs0, hs1s, w2, k2) 179 sha1_round3(hs0, hs1, w2, k2)
179 sha1_round3(hs0, hs1s, w3, k2) 180 sha1_round3(hs0, hs1, w3, k2)
180 181
181 /* Rounds 48 through 63 (four rounds at a time). */ 182 /* Rounds 48 through 63 (four rounds at a time). */
182 sha1_message_schedule_update(w0, w1, w2, w3) 183 sha1_message_schedule_update(w0, w1, w2, w3)
@@ -184,10 +185,10 @@ sha1_block_ce:
184 sha1_message_schedule_update(w2, w3, w0, w1) 185 sha1_message_schedule_update(w2, w3, w0, w1)
185 sha1_message_schedule_update(w3, w0, w1, w2) 186 sha1_message_schedule_update(w3, w0, w1, w2)
186 187
187 sha1_round3(hs0, hs1s, w0, k2) 188 sha1_round3(hs0, hs1, w0, k2)
188 sha1_round3(hs0, hs1s, w1, k2) 189 sha1_round3(hs0, hs1, w1, k2)
189 sha1_round3(hs0, hs1s, w2, k2) 190 sha1_round3(hs0, hs1, w2, k2)
190 sha1_round4(hs0, hs1s, w3, k3) 191 sha1_round4(hs0, hs1, w3, k3)
191 192
192 /* Rounds 64 through 79 (four rounds at a time). */ 193 /* Rounds 64 through 79 (four rounds at a time). */
193 sha1_message_schedule_update(w0, w1, w2, w3) 194 sha1_message_schedule_update(w0, w1, w2, w3)
@@ -195,10 +196,10 @@ sha1_block_ce:
195 sha1_message_schedule_update(w2, w3, w0, w1) 196 sha1_message_schedule_update(w2, w3, w0, w1)
196 sha1_message_schedule_update(w3, w0, w1, w2) 197 sha1_message_schedule_update(w3, w0, w1, w2)
197 198
198 sha1_round4(hs0, hs1s, w0, k3) 199 sha1_round4(hs0, hs1, w0, k3)
199 sha1_round4(hs0, hs1s, w1, k3) 200 sha1_round4(hs0, hs1, w1, k3)
200 sha1_round4(hs0, hs1s, w2, k3) 201 sha1_round4(hs0, hs1, w2, k3)
201 sha1_round4(hs0, hs1s, w3, k3) 202 sha1_round4(hs0, hs1, w3, k3)
202 203
203 /* Add intermediate state to hash state. */ 204 /* Add intermediate state to hash state. */
204 add hc0.4s, hc0.4s, hs0.4s 205 add hc0.4s, hc0.4s, hs0.4s
diff --git a/src/lib/libcrypto/sha/sha256_aarch64_ce.S b/src/lib/libcrypto/sha/sha256_aarch64_ce.S
index 343f338390..8a26f91b06 100644
--- a/src/lib/libcrypto/sha/sha256_aarch64_ce.S
+++ b/src/lib/libcrypto/sha/sha256_aarch64_ce.S
@@ -1,4 +1,4 @@
1/* $OpenBSD: sha256_aarch64_ce.S,v 1.5 2026/01/24 14:20:52 jsing Exp $ */ 1/* $OpenBSD: sha256_aarch64_ce.S,v 1.6 2026/01/25 08:22:17 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -54,6 +54,10 @@
54 54
55#define tmp0 v28 55#define tmp0 v28
56#define tmp1 v29 56#define tmp1 v29
57#define tmp1q q29
58
59#define v18q q18
60#define v19q q19
57 61
58/* 62/*
59 * Update message schedule for m0 (W0:W1:W2:W3), using m1 (W4:W5:W6:W7), 63 * Update message schedule for m0 (W0:W1:W2:W3), using m1 (W4:W5:W6:W7),
@@ -74,9 +78,12 @@
74 */ 78 */
75#define sha256_round(h0, h1, w, k) \ 79#define sha256_round(h0, h1, w, k) \
76 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ 80 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \
77 mov tmp1.4s, h0.4s; \ 81 mov tmp1.16b, h0.16b; \
78 sha256h h0, h1, tmp0.4s; \ 82 sha256h h0##q, h1##q, tmp0.4s; \
79 sha256h2 h1, tmp1, tmp0.4s 83 sha256h2 h1##q, tmp1##q, tmp0.4s
84
85#define sha256_round_initial(h0, h1, w, k) \
86 sha256_round(h0, h1, w, k)
80 87
81#define sha256_round_update(h0, h1, m0, m1, m2, m3, k) \ 88#define sha256_round_update(h0, h1, m0, m1, m2, m3, k) \
82 sha256_message_schedule_update(m0, m1, m2, m3); \ 89 sha256_message_schedule_update(m0, m1, m2, m3); \
@@ -109,8 +116,8 @@ sha256_block_ce:
109 mov k256, k256_base 116 mov k256, k256_base
110 117
111 /* Copy current hash state. */ 118 /* Copy current hash state. */
112 mov hs0.4s, hc0.4s 119 mov hs0.16b, hc0.16b
113 mov hs1.4s, hc1.4s 120 mov hs1.16b, hc1.16b
114 121
115 /* Load and byte swap message schedule. */ 122 /* Load and byte swap message schedule. */
116 ld1 {w0.16b, w1.16b, w2.16b, w3.16b}, [in], #64 123 ld1 {w0.16b, w1.16b, w2.16b, w3.16b}, [in], #64
@@ -122,10 +129,10 @@ sha256_block_ce:
122 /* Rounds 0 through 15 (four rounds at a time). */ 129 /* Rounds 0 through 15 (four rounds at a time). */
123 ld1 {k0.4s, k1.4s, k2.4s, k3.4s}, [k256], #64 130 ld1 {k0.4s, k1.4s, k2.4s, k3.4s}, [k256], #64
124 131
125 sha256_round(hs0, hs1, w0, k0) 132 sha256_round_initial(hs0, hs1, w0, k0)
126 sha256_round(hs0, hs1, w1, k1) 133 sha256_round_initial(hs0, hs1, w1, k1)
127 sha256_round(hs0, hs1, w2, k2) 134 sha256_round_initial(hs0, hs1, w2, k2)
128 sha256_round(hs0, hs1, w3, k3) 135 sha256_round_initial(hs0, hs1, w3, k3)
129 136
130 /* Rounds 16 through 31 (four rounds at a time). */ 137 /* Rounds 16 through 31 (four rounds at a time). */
131 ld1 {k0.4s, k1.4s, k2.4s, k3.4s}, [k256], #64 138 ld1 {k0.4s, k1.4s, k2.4s, k3.4s}, [k256], #64
diff --git a/src/lib/libcrypto/sha/sha512_aarch64_ce.S b/src/lib/libcrypto/sha/sha512_aarch64_ce.S
index bec56a49e5..6efe775ff5 100644
--- a/src/lib/libcrypto/sha/sha512_aarch64_ce.S
+++ b/src/lib/libcrypto/sha/sha512_aarch64_ce.S
@@ -1,4 +1,4 @@
1/* $OpenBSD: sha512_aarch64_ce.S,v 1.3 2026/01/17 06:31:45 jsing Exp $ */ 1/* $OpenBSD: sha512_aarch64_ce.S,v 1.4 2026/01/25 08:22:17 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -83,9 +83,15 @@
83#define k7 v27 83#define k7 v27
84 84
85#define tmp0 v8 85#define tmp0 v8
86#define tmp0q q8
86#define tmp1 v9 87#define tmp1 v9
87#define tmp2 v18 88#define tmp2 v18
88 89
90#define v0q q0
91#define v1q q1
92#define v4q q4
93#define v6q q6
94
89/* 95/*
90 * Update message schedule for m0 (W0:W1), using m1 (W2:W3), m4 (W8:W9), 96 * Update message schedule for m0 (W0:W1), using m1 (W2:W3), m4 (W8:W9),
91 * m5 (W10:W11) and m7 (W14:W15). The sha512su0 instruction computes the sigma0 97 * m5 (W10:W11) and m7 (W14:W15). The sha512su0 instruction computes the sigma0
@@ -141,9 +147,12 @@
141 add h4.2d, h4.2d, h3.2d; /* W1:W0 += g:h */ \ 147 add h4.2d, h4.2d, h3.2d; /* W1:W0 += g:h */ \
142 ext tmp0.16b, h2.16b, h3.16b, #8; /* f:g */ \ 148 ext tmp0.16b, h2.16b, h3.16b, #8; /* f:g */ \
143 ext tmp1.16b, h1.16b, h2.16b, #8; /* d:e */ \ 149 ext tmp1.16b, h1.16b, h2.16b, #8; /* d:e */ \
144 sha512h h4, tmp0, tmp1.2d; /* T1 */ \ 150 sha512h h4##q, tmp0##q, tmp1.2d; /* T1 */ \
145 add h5.2d, h1.2d, h4.2d; /* c:d + T1 */ \ 151 add h5.2d, h1.2d, h4.2d; /* c:d + T1 */ \
146 sha512h2 h4, h1, h0.2d; /* T1 + T2 */ 152 sha512h2 h4##q, h1##q, h0.2d; /* T1 + T2 */
153
154#define sha512_round_initial(h0, h1, h2, h3, h4, h5, w, k) \
155 sha512_round(h0, h1, h2, h3, h4, h5, w, k)
147 156
148#define sha512_round_update(h0, h1, h2, h3, h4, h5, m0, m1, m2, m3, m4, k) \ 157#define sha512_round_update(h0, h1, h2, h3, h4, h5, m0, m1, m2, m3, m4, k) \
149 sha512_message_schedule_update(m0, m1, m2, m3, m4) \ 158 sha512_message_schedule_update(m0, m1, m2, m3, m4) \
@@ -181,10 +190,10 @@ sha512_block_ce:
181 mov k512, k512_base 190 mov k512, k512_base
182 191
183 /* Copy current hash state. */ 192 /* Copy current hash state. */
184 mov hs0.2d, hc0.2d 193 mov hs0.16b, hc0.16b
185 mov hs1.2d, hc1.2d 194 mov hs1.16b, hc1.16b
186 mov hs2.2d, hc2.2d 195 mov hs2.16b, hc2.16b
187 mov hs3.2d, hc3.2d 196 mov hs3.16b, hc3.16b
188 197
189 /* Load and byte swap message schedule. */ 198 /* Load and byte swap message schedule. */
190 ld1 {w0.16b, w1.16b, w2.16b, w3.16b}, [in], #64 199 ld1 {w0.16b, w1.16b, w2.16b, w3.16b}, [in], #64
@@ -203,14 +212,14 @@ sha512_block_ce:
203 ld1 {k0.2d, k1.2d, k2.2d, k3.2d}, [k512], #64 212 ld1 {k0.2d, k1.2d, k2.2d, k3.2d}, [k512], #64
204 ld1 {k4.2d, k5.2d, k6.2d, k7.2d}, [k512], #64 213 ld1 {k4.2d, k5.2d, k6.2d, k7.2d}, [k512], #64
205 214
206 sha512_round(hs0, hs1, hs2, hs3, hs4, hs5, w0, k0) 215 sha512_round_initial(hs0, hs1, hs2, hs3, hs4, hs5, w0, k0)
207 sha512_round(hs4, hs0, hs5, hs2, hs6, hs7, w1, k1) 216 sha512_round_initial(hs4, hs0, hs5, hs2, hs6, hs7, w1, k1)
208 sha512_round(hs6, hs4, hs7, hs5, hs1, hs3, w2, k2) 217 sha512_round_initial(hs6, hs4, hs7, hs5, hs1, hs3, w2, k2)
209 sha512_round(hs1, hs6, hs3, hs7, hs0, hs2, w3, k3) 218 sha512_round_initial(hs1, hs6, hs3, hs7, hs0, hs2, w3, k3)
210 sha512_round(hs0, hs1, hs2, hs3, hs4, hs5, w4, k4) 219 sha512_round_initial(hs0, hs1, hs2, hs3, hs4, hs5, w4, k4)
211 sha512_round(hs4, hs0, hs5, hs2, hs6, hs7, w5, k5) 220 sha512_round_initial(hs4, hs0, hs5, hs2, hs6, hs7, w5, k5)
212 sha512_round(hs6, hs4, hs7, hs5, hs1, hs3, w6, k6) 221 sha512_round_initial(hs6, hs4, hs7, hs5, hs1, hs3, w6, k6)
213 sha512_round(hs1, hs6, hs3, hs7, hs0, hs2, w7, k7) 222 sha512_round_initial(hs1, hs6, hs3, hs7, hs0, hs2, w7, k7)
214 223
215 /* Rounds 16 through 31 (two rounds at a time). */ 224 /* Rounds 16 through 31 (two rounds at a time). */
216 ld1 {k0.2d, k1.2d, k2.2d, k3.2d}, [k512], #64 225 ld1 {k0.2d, k1.2d, k2.2d, k3.2d}, [k512], #64