summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/lib/libcrypto/sha/sha1_aarch64_ce.S12
-rw-r--r--src/lib/libcrypto/sha/sha1_amd64_generic.S150
-rw-r--r--src/lib/libcrypto/sha/sha1_amd64_shani.S20
-rw-r--r--src/lib/libcrypto/sha/sha256_aarch64_ce.S8
-rw-r--r--src/lib/libcrypto/sha/sha256_amd64_generic.S12
-rw-r--r--src/lib/libcrypto/sha/sha256_amd64_shani.S12
-rw-r--r--src/lib/libcrypto/sha/sha512_amd64_generic.S12
7 files changed, 113 insertions, 113 deletions
diff --git a/src/lib/libcrypto/sha/sha1_aarch64_ce.S b/src/lib/libcrypto/sha/sha1_aarch64_ce.S
index ce7eb81115..853d467641 100644
--- a/src/lib/libcrypto/sha/sha1_aarch64_ce.S
+++ b/src/lib/libcrypto/sha/sha1_aarch64_ce.S
@@ -1,4 +1,4 @@
1/* $OpenBSD: sha1_aarch64_ce.S,v 1.3 2026/01/17 06:31:45 jsing Exp $ */ 1/* $OpenBSD: sha1_aarch64_ce.S,v 1.4 2026/01/24 14:20:52 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -65,7 +65,7 @@
65 */ 65 */
66#define sha1_message_schedule_update(m0, m1, m2, m3) \ 66#define sha1_message_schedule_update(m0, m1, m2, m3) \
67 sha1su0 m0.4s, m1.4s, m2.4s; \ 67 sha1su0 m0.4s, m1.4s, m2.4s; \
68 sha1su1 m0.4s, m3.4s; 68 sha1su1 m0.4s, m3.4s
69 69
70/* 70/*
71 * Compute four SHA-1 rounds by adding W0:W1:W2:W3 + K0:K1:K2:K3, then 71 * Compute four SHA-1 rounds by adding W0:W1:W2:W3 + K0:K1:K2:K3, then
@@ -77,25 +77,25 @@
77 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ 77 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \
78 mov tmp1, h0.s[0]; \ 78 mov tmp1, h0.s[0]; \
79 sha1c h0, h1, tmp0.4s; \ 79 sha1c h0, h1, tmp0.4s; \
80 sha1h h1, tmp1; 80 sha1h h1, tmp1
81 81
82#define sha1_round2(h0, h1, w, k) \ 82#define sha1_round2(h0, h1, w, k) \
83 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ 83 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \
84 mov tmp1, h0.s[0]; \ 84 mov tmp1, h0.s[0]; \
85 sha1p h0, h1, tmp0.4s; \ 85 sha1p h0, h1, tmp0.4s; \
86 sha1h h1, tmp1; 86 sha1h h1, tmp1
87 87
88#define sha1_round3(h0, h1, w, k) \ 88#define sha1_round3(h0, h1, w, k) \
89 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ 89 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \
90 mov tmp1, h0.s[0]; \ 90 mov tmp1, h0.s[0]; \
91 sha1m h0, h1, tmp0.4s; \ 91 sha1m h0, h1, tmp0.4s; \
92 sha1h h1, tmp1; 92 sha1h h1, tmp1
93 93
94#define sha1_round4(h0, h1, w, k) \ 94#define sha1_round4(h0, h1, w, k) \
95 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ 95 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \
96 mov tmp1, h0.s[0]; \ 96 mov tmp1, h0.s[0]; \
97 sha1p h0, h1, tmp0.4s; \ 97 sha1p h0, h1, tmp0.4s; \
98 sha1h h1, tmp1; 98 sha1h h1, tmp1
99 99
100.arch armv8-a+sha2 100.arch armv8-a+sha2
101 101
diff --git a/src/lib/libcrypto/sha/sha1_amd64_generic.S b/src/lib/libcrypto/sha/sha1_amd64_generic.S
index 685d71edf8..a2088229ec 100644
--- a/src/lib/libcrypto/sha/sha1_amd64_generic.S
+++ b/src/lib/libcrypto/sha/sha1_amd64_generic.S
@@ -1,4 +1,4 @@
1/* $OpenBSD: sha1_amd64_generic.S,v 1.3 2026/01/17 06:31:45 jsing Exp $ */ 1/* $OpenBSD: sha1_amd64_generic.S,v 1.4 2026/01/24 14:20:52 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -46,7 +46,7 @@
46#define sha1_message_schedule_load(idx, m, w, wt) \ 46#define sha1_message_schedule_load(idx, m, w, wt) \
47 movl ((idx&0xf)*4)(m), wt; \ 47 movl ((idx&0xf)*4)(m), wt; \
48 bswapl wt; \ 48 bswapl wt; \
49 movl wt, ((idx&0xf)*4)(w); 49 movl wt, ((idx&0xf)*4)(w)
50 50
51/* 51/*
52 * Update message schedule and return current value in wt: 52 * Update message schedule and return current value in wt:
@@ -60,7 +60,7 @@
60 xorl (((idx)&0xf)*4)(w), wt; /* W0 */ \ 60 xorl (((idx)&0xf)*4)(w), wt; /* W0 */ \
61 roll $1, wt; \ 61 roll $1, wt; \
62 \ 62 \
63 movl wt, ((idx&0xf)*4)(w); 63 movl wt, ((idx&0xf)*4)(w)
64 64
65/* 65/*
66 * Compute a SHA-1 round without logic function: 66 * Compute a SHA-1 round without logic function:
@@ -97,7 +97,7 @@
97 xorl d, tmp2; /* Ch */ \ 97 xorl d, tmp2; /* Ch */ \
98 addl tmp2, e; /* Ch */ \ 98 addl tmp2, e; /* Ch */ \
99 \ 99 \
100 sha1_round(a, b, c, d, e, kt, wt); 100 sha1_round(a, b, c, d, e, kt, wt)
101 101
102/* 102/*
103 * Compute a SHA-1 round with Parity: 103 * Compute a SHA-1 round with Parity:
@@ -114,7 +114,7 @@
114 xorl d, tmp2; /* Parity */ \ 114 xorl d, tmp2; /* Parity */ \
115 addl tmp2, e; /* Parity */ \ 115 addl tmp2, e; /* Parity */ \
116 \ 116 \
117 sha1_round(a, b, c, d, e, kt, wt); 117 sha1_round(a, b, c, d, e, kt, wt)
118 118
119/* 119/*
120 * Compute a SHA-1 round with Maj: 120 * Compute a SHA-1 round with Maj:
@@ -134,26 +134,26 @@
134 xorl tmp2, tmp3; /* Maj */ \ 134 xorl tmp2, tmp3; /* Maj */ \
135 addl tmp3, e; /* Maj */ \ 135 addl tmp3, e; /* Maj */ \
136 \ 136 \
137 sha1_round(a, b, c, d, e, kt, wt); 137 sha1_round(a, b, c, d, e, kt, wt)
138 138
139#define sha1_round1_load(idx, a, b, c, d, e) \ 139#define sha1_round1_load(idx, a, b, c, d, e) \
140 sha1_message_schedule_load(idx, in, %rsp, tmp0) \ 140 sha1_message_schedule_load(idx, in, %rsp, tmp0); \
141 sha1_round_ch(a, b, c, d, e, 0x5a827999, tmp0) 141 sha1_round_ch(a, b, c, d, e, 0x5a827999, tmp0)
142 142
143#define sha1_round1_update(idx, a, b, c, d, e) \ 143#define sha1_round1_update(idx, a, b, c, d, e) \
144 sha1_message_schedule_update(idx, %rsp, tmp0) \ 144 sha1_message_schedule_update(idx, %rsp, tmp0); \
145 sha1_round_ch(a, b, c, d, e, 0x5a827999, tmp0) 145 sha1_round_ch(a, b, c, d, e, 0x5a827999, tmp0)
146 146
147#define sha1_round2_update(idx, a, b, c, d, e) \ 147#define sha1_round2_update(idx, a, b, c, d, e) \
148 sha1_message_schedule_update(idx, %rsp, tmp0) \ 148 sha1_message_schedule_update(idx, %rsp, tmp0); \
149 sha1_round_parity(a, b, c, d, e, 0x6ed9eba1, tmp0) 149 sha1_round_parity(a, b, c, d, e, 0x6ed9eba1, tmp0)
150 150
151#define sha1_round3_update(idx, a, b, c, d, e) \ 151#define sha1_round3_update(idx, a, b, c, d, e) \
152 sha1_message_schedule_update(idx, %rsp, tmp0) \ 152 sha1_message_schedule_update(idx, %rsp, tmp0); \
153 sha1_round_maj(a, b, c, d, e, 0x8f1bbcdc, tmp0) 153 sha1_round_maj(a, b, c, d, e, 0x8f1bbcdc, tmp0)
154 154
155#define sha1_round4_update(idx, a, b, c, d, e) \ 155#define sha1_round4_update(idx, a, b, c, d, e) \
156 sha1_message_schedule_update(idx, %rsp, tmp0) \ 156 sha1_message_schedule_update(idx, %rsp, tmp0); \
157 sha1_round_parity(a, b, c, d, e, 0xca62c1d6, tmp0) 157 sha1_round_parity(a, b, c, d, e, 0xca62c1d6, tmp0)
158 158
159.section .text 159.section .text
@@ -215,76 +215,76 @@ sha1_block_generic:
215 sha1_round1_load(15, hs0, hs1, hs2, hs3, hs4) 215 sha1_round1_load(15, hs0, hs1, hs2, hs3, hs4)
216 216
217 /* Round 16 through 31. */ 217 /* Round 16 through 31. */
218 sha1_round1_update(16, hs4, hs0, hs1, hs2, hs3); 218 sha1_round1_update(16, hs4, hs0, hs1, hs2, hs3)
219 sha1_round1_update(17, hs3, hs4, hs0, hs1, hs2); 219 sha1_round1_update(17, hs3, hs4, hs0, hs1, hs2)
220 sha1_round1_update(18, hs2, hs3, hs4, hs0, hs1); 220 sha1_round1_update(18, hs2, hs3, hs4, hs0, hs1)
221 sha1_round1_update(19, hs1, hs2, hs3, hs4, hs0); 221 sha1_round1_update(19, hs1, hs2, hs3, hs4, hs0)
222 sha1_round2_update(20, hs0, hs1, hs2, hs3, hs4); 222 sha1_round2_update(20, hs0, hs1, hs2, hs3, hs4)
223 sha1_round2_update(21, hs4, hs0, hs1, hs2, hs3); 223 sha1_round2_update(21, hs4, hs0, hs1, hs2, hs3)
224 sha1_round2_update(22, hs3, hs4, hs0, hs1, hs2); 224 sha1_round2_update(22, hs3, hs4, hs0, hs1, hs2)
225 sha1_round2_update(23, hs2, hs3, hs4, hs0, hs1); 225 sha1_round2_update(23, hs2, hs3, hs4, hs0, hs1)
226 sha1_round2_update(24, hs1, hs2, hs3, hs4, hs0); 226 sha1_round2_update(24, hs1, hs2, hs3, hs4, hs0)
227 sha1_round2_update(25, hs0, hs1, hs2, hs3, hs4); 227 sha1_round2_update(25, hs0, hs1, hs2, hs3, hs4)
228 sha1_round2_update(26, hs4, hs0, hs1, hs2, hs3); 228 sha1_round2_update(26, hs4, hs0, hs1, hs2, hs3)
229 sha1_round2_update(27, hs3, hs4, hs0, hs1, hs2); 229 sha1_round2_update(27, hs3, hs4, hs0, hs1, hs2)
230 sha1_round2_update(28, hs2, hs3, hs4, hs0, hs1); 230 sha1_round2_update(28, hs2, hs3, hs4, hs0, hs1)
231 sha1_round2_update(29, hs1, hs2, hs3, hs4, hs0); 231 sha1_round2_update(29, hs1, hs2, hs3, hs4, hs0)
232 sha1_round2_update(30, hs0, hs1, hs2, hs3, hs4); 232 sha1_round2_update(30, hs0, hs1, hs2, hs3, hs4)
233 sha1_round2_update(31, hs4, hs0, hs1, hs2, hs3); 233 sha1_round2_update(31, hs4, hs0, hs1, hs2, hs3)
234 234
235 /* Round 32 through 47. */ 235 /* Round 32 through 47. */
236 sha1_round2_update(32, hs3, hs4, hs0, hs1, hs2); 236 sha1_round2_update(32, hs3, hs4, hs0, hs1, hs2)
237 sha1_round2_update(33, hs2, hs3, hs4, hs0, hs1); 237 sha1_round2_update(33, hs2, hs3, hs4, hs0, hs1)
238 sha1_round2_update(34, hs1, hs2, hs3, hs4, hs0); 238 sha1_round2_update(34, hs1, hs2, hs3, hs4, hs0)
239 sha1_round2_update(35, hs0, hs1, hs2, hs3, hs4); 239 sha1_round2_update(35, hs0, hs1, hs2, hs3, hs4)
240 sha1_round2_update(36, hs4, hs0, hs1, hs2, hs3); 240 sha1_round2_update(36, hs4, hs0, hs1, hs2, hs3)
241 sha1_round2_update(37, hs3, hs4, hs0, hs1, hs2); 241 sha1_round2_update(37, hs3, hs4, hs0, hs1, hs2)
242 sha1_round2_update(38, hs2, hs3, hs4, hs0, hs1); 242 sha1_round2_update(38, hs2, hs3, hs4, hs0, hs1)
243 sha1_round2_update(39, hs1, hs2, hs3, hs4, hs0); 243 sha1_round2_update(39, hs1, hs2, hs3, hs4, hs0)
244 sha1_round3_update(40, hs0, hs1, hs2, hs3, hs4); 244 sha1_round3_update(40, hs0, hs1, hs2, hs3, hs4)
245 sha1_round3_update(41, hs4, hs0, hs1, hs2, hs3); 245 sha1_round3_update(41, hs4, hs0, hs1, hs2, hs3)
246 sha1_round3_update(42, hs3, hs4, hs0, hs1, hs2); 246 sha1_round3_update(42, hs3, hs4, hs0, hs1, hs2)
247 sha1_round3_update(43, hs2, hs3, hs4, hs0, hs1); 247 sha1_round3_update(43, hs2, hs3, hs4, hs0, hs1)
248 sha1_round3_update(44, hs1, hs2, hs3, hs4, hs0); 248 sha1_round3_update(44, hs1, hs2, hs3, hs4, hs0)
249 sha1_round3_update(45, hs0, hs1, hs2, hs3, hs4); 249 sha1_round3_update(45, hs0, hs1, hs2, hs3, hs4)
250 sha1_round3_update(46, hs4, hs0, hs1, hs2, hs3); 250 sha1_round3_update(46, hs4, hs0, hs1, hs2, hs3)
251 sha1_round3_update(47, hs3, hs4, hs0, hs1, hs2); 251 sha1_round3_update(47, hs3, hs4, hs0, hs1, hs2)
252 252
253 /* Round 48 through 63. */ 253 /* Round 48 through 63. */
254 sha1_round3_update(48, hs2, hs3, hs4, hs0, hs1); 254 sha1_round3_update(48, hs2, hs3, hs4, hs0, hs1)
255 sha1_round3_update(49, hs1, hs2, hs3, hs4, hs0); 255 sha1_round3_update(49, hs1, hs2, hs3, hs4, hs0)
256 sha1_round3_update(50, hs0, hs1, hs2, hs3, hs4); 256 sha1_round3_update(50, hs0, hs1, hs2, hs3, hs4)
257 sha1_round3_update(51, hs4, hs0, hs1, hs2, hs3); 257 sha1_round3_update(51, hs4, hs0, hs1, hs2, hs3)
258 sha1_round3_update(52, hs3, hs4, hs0, hs1, hs2); 258 sha1_round3_update(52, hs3, hs4, hs0, hs1, hs2)
259 sha1_round3_update(53, hs2, hs3, hs4, hs0, hs1); 259 sha1_round3_update(53, hs2, hs3, hs4, hs0, hs1)
260 sha1_round3_update(54, hs1, hs2, hs3, hs4, hs0); 260 sha1_round3_update(54, hs1, hs2, hs3, hs4, hs0)
261 sha1_round3_update(55, hs0, hs1, hs2, hs3, hs4); 261 sha1_round3_update(55, hs0, hs1, hs2, hs3, hs4)
262 sha1_round3_update(56, hs4, hs0, hs1, hs2, hs3); 262 sha1_round3_update(56, hs4, hs0, hs1, hs2, hs3)
263 sha1_round3_update(57, hs3, hs4, hs0, hs1, hs2); 263 sha1_round3_update(57, hs3, hs4, hs0, hs1, hs2)
264 sha1_round3_update(58, hs2, hs3, hs4, hs0, hs1); 264 sha1_round3_update(58, hs2, hs3, hs4, hs0, hs1)
265 sha1_round3_update(59, hs1, hs2, hs3, hs4, hs0); 265 sha1_round3_update(59, hs1, hs2, hs3, hs4, hs0)
266 sha1_round4_update(60, hs0, hs1, hs2, hs3, hs4); 266 sha1_round4_update(60, hs0, hs1, hs2, hs3, hs4)
267 sha1_round4_update(61, hs4, hs0, hs1, hs2, hs3); 267 sha1_round4_update(61, hs4, hs0, hs1, hs2, hs3)
268 sha1_round4_update(62, hs3, hs4, hs0, hs1, hs2); 268 sha1_round4_update(62, hs3, hs4, hs0, hs1, hs2)
269 sha1_round4_update(63, hs2, hs3, hs4, hs0, hs1); 269 sha1_round4_update(63, hs2, hs3, hs4, hs0, hs1)
270 270
271 /* Round 64 through 79. */ 271 /* Round 64 through 79. */
272 sha1_round4_update(64, hs1, hs2, hs3, hs4, hs0); 272 sha1_round4_update(64, hs1, hs2, hs3, hs4, hs0)
273 sha1_round4_update(65, hs0, hs1, hs2, hs3, hs4); 273 sha1_round4_update(65, hs0, hs1, hs2, hs3, hs4)
274 sha1_round4_update(66, hs4, hs0, hs1, hs2, hs3); 274 sha1_round4_update(66, hs4, hs0, hs1, hs2, hs3)
275 sha1_round4_update(67, hs3, hs4, hs0, hs1, hs2); 275 sha1_round4_update(67, hs3, hs4, hs0, hs1, hs2)
276 sha1_round4_update(68, hs2, hs3, hs4, hs0, hs1); 276 sha1_round4_update(68, hs2, hs3, hs4, hs0, hs1)
277 sha1_round4_update(69, hs1, hs2, hs3, hs4, hs0); 277 sha1_round4_update(69, hs1, hs2, hs3, hs4, hs0)
278 sha1_round4_update(70, hs0, hs1, hs2, hs3, hs4); 278 sha1_round4_update(70, hs0, hs1, hs2, hs3, hs4)
279 sha1_round4_update(71, hs4, hs0, hs1, hs2, hs3); 279 sha1_round4_update(71, hs4, hs0, hs1, hs2, hs3)
280 sha1_round4_update(72, hs3, hs4, hs0, hs1, hs2); 280 sha1_round4_update(72, hs3, hs4, hs0, hs1, hs2)
281 sha1_round4_update(73, hs2, hs3, hs4, hs0, hs1); 281 sha1_round4_update(73, hs2, hs3, hs4, hs0, hs1)
282 sha1_round4_update(74, hs1, hs2, hs3, hs4, hs0); 282 sha1_round4_update(74, hs1, hs2, hs3, hs4, hs0)
283 sha1_round4_update(75, hs0, hs1, hs2, hs3, hs4); 283 sha1_round4_update(75, hs0, hs1, hs2, hs3, hs4)
284 sha1_round4_update(76, hs4, hs0, hs1, hs2, hs3); 284 sha1_round4_update(76, hs4, hs0, hs1, hs2, hs3)
285 sha1_round4_update(77, hs3, hs4, hs0, hs1, hs2); 285 sha1_round4_update(77, hs3, hs4, hs0, hs1, hs2)
286 sha1_round4_update(78, hs2, hs3, hs4, hs0, hs1); 286 sha1_round4_update(78, hs2, hs3, hs4, hs0, hs1)
287 sha1_round4_update(79, hs1, hs2, hs3, hs4, hs0); 287 sha1_round4_update(79, hs1, hs2, hs3, hs4, hs0)
288 288
289 /* Add intermediate state to hash state. */ 289 /* Add intermediate state to hash state. */
290 addl (0*4)(ctx), hs0 290 addl (0*4)(ctx), hs0
diff --git a/src/lib/libcrypto/sha/sha1_amd64_shani.S b/src/lib/libcrypto/sha/sha1_amd64_shani.S
index 751554f1d5..e31143887a 100644
--- a/src/lib/libcrypto/sha/sha1_amd64_shani.S
+++ b/src/lib/libcrypto/sha/sha1_amd64_shani.S
@@ -1,4 +1,4 @@
1/* $OpenBSD: sha1_amd64_shani.S,v 1.2 2026/01/17 06:31:45 jsing Exp $ */ 1/* $OpenBSD: sha1_amd64_shani.S,v 1.3 2026/01/24 14:20:52 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -50,25 +50,25 @@
50 50
51#define sha1_message_schedule_load(idx, m, xmsg) \ 51#define sha1_message_schedule_load(idx, m, xmsg) \
52 movdqu (idx*16)(m), xmsg; \ 52 movdqu (idx*16)(m), xmsg; \
53 pshufb xshufmask, xmsg; 53 pshufb xshufmask, xmsg
54 54
55#define sha1_message_schedule_update(xm0, xm1, xm2, xm3) \ 55#define sha1_message_schedule_update(xm0, xm1, xm2, xm3) \
56 sha1msg1 xm1, xm0; \ 56 sha1msg1 xm1, xm0; \
57 pxor xm2, xm0; \ 57 pxor xm2, xm0; \
58 sha1msg2 xm3, xm0; 58 sha1msg2 xm3, xm0
59 59
60#define sha1_shani_round(fn, xmsg, xe, xe_next) \ 60#define sha1_shani_round(fn, xmsg, xe, xe_next) \
61 sha1nexte xmsg, xe; \ 61 sha1nexte xmsg, xe; \
62 movdqa xabcd, xe_next; \ 62 movdqa xabcd, xe_next; \
63 sha1rnds4 fn, xe, xabcd; 63 sha1rnds4 fn, xe, xabcd
64 64
65#define sha1_shani_round_load(fn, idx, m, xmsg, xe, xe_next) \ 65#define sha1_shani_round_load(fn, idx, m, xmsg, xe, xe_next) \
66 sha1_message_schedule_load(idx, m, xmsg); \ 66 sha1_message_schedule_load(idx, m, xmsg); \
67 sha1_shani_round(fn, xmsg, xe, xe_next); 67 sha1_shani_round(fn, xmsg, xe, xe_next)
68 68
69#define sha1_shani_round_update(fn, xm0, xm1, xm2, xm3, xe, xe_next) \ 69#define sha1_shani_round_update(fn, xm0, xm1, xm2, xm3, xe, xe_next) \
70 sha1_message_schedule_update(xm0, xm1, xm2, xm3); \ 70 sha1_message_schedule_update(xm0, xm1, xm2, xm3); \
71 sha1_shani_round(fn, xm0, xe, xe_next); 71 sha1_shani_round(fn, xm0, xe, xe_next)
72 72
73 73
74.section .text 74.section .text
@@ -109,14 +109,14 @@ sha1_block_shani:
109 movdqa xe0, xe_save 109 movdqa xe0, xe_save
110 110
111 /* Rounds 0 through 15 (four rounds at a time). */ 111 /* Rounds 0 through 15 (four rounds at a time). */
112 sha1_message_schedule_load(0, in, xmsg0); 112 sha1_message_schedule_load(0, in, xmsg0)
113 paddd xmsg0, xe0 113 paddd xmsg0, xe0
114 movdqa xabcd, xe1 114 movdqa xabcd, xe1
115 sha1rnds4 $0, xe0, xabcd 115 sha1rnds4 $0, xe0, xabcd
116 116
117 sha1_shani_round_load($0, 1, in, xmsg1, xe1, xe0); 117 sha1_shani_round_load($0, 1, in, xmsg1, xe1, xe0)
118 sha1_shani_round_load($0, 2, in, xmsg2, xe0, xe1); 118 sha1_shani_round_load($0, 2, in, xmsg2, xe0, xe1)
119 sha1_shani_round_load($0, 3, in, xmsg3, xe1, xe0); 119 sha1_shani_round_load($0, 3, in, xmsg3, xe1, xe0)
120 120
121 /* Rounds 16 through 79 (four rounds at a time). */ 121 /* Rounds 16 through 79 (four rounds at a time). */
122 sha1_shani_round_update($0, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1) 122 sha1_shani_round_update($0, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1)
diff --git a/src/lib/libcrypto/sha/sha256_aarch64_ce.S b/src/lib/libcrypto/sha/sha256_aarch64_ce.S
index b66969427b..343f338390 100644
--- a/src/lib/libcrypto/sha/sha256_aarch64_ce.S
+++ b/src/lib/libcrypto/sha/sha256_aarch64_ce.S
@@ -1,4 +1,4 @@
1/* $OpenBSD: sha256_aarch64_ce.S,v 1.4 2026/01/17 06:31:45 jsing Exp $ */ 1/* $OpenBSD: sha256_aarch64_ce.S,v 1.5 2026/01/24 14:20:52 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -65,7 +65,7 @@
65 */ 65 */
66#define sha256_message_schedule_update(m0, m1, m2, m3) \ 66#define sha256_message_schedule_update(m0, m1, m2, m3) \
67 sha256su0 m0.4s, m1.4s; \ 67 sha256su0 m0.4s, m1.4s; \
68 sha256su1 m0.4s, m2.4s, m3.4s; 68 sha256su1 m0.4s, m2.4s, m3.4s
69 69
70/* 70/*
71 * Compute four SHA-256 rounds by adding W0:W1:W2:W3 + K0:K1:K2:K3, then 71 * Compute four SHA-256 rounds by adding W0:W1:W2:W3 + K0:K1:K2:K3, then
@@ -76,10 +76,10 @@
76 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ 76 add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \
77 mov tmp1.4s, h0.4s; \ 77 mov tmp1.4s, h0.4s; \
78 sha256h h0, h1, tmp0.4s; \ 78 sha256h h0, h1, tmp0.4s; \
79 sha256h2 h1, tmp1, tmp0.4s; 79 sha256h2 h1, tmp1, tmp0.4s
80 80
81#define sha256_round_update(h0, h1, m0, m1, m2, m3, k) \ 81#define sha256_round_update(h0, h1, m0, m1, m2, m3, k) \
82 sha256_message_schedule_update(m0, m1, m2, m3) \ 82 sha256_message_schedule_update(m0, m1, m2, m3); \
83 sha256_round(h0, h1, m0, k) 83 sha256_round(h0, h1, m0, k)
84 84
85.arch armv8-a+sha2 85.arch armv8-a+sha2
diff --git a/src/lib/libcrypto/sha/sha256_amd64_generic.S b/src/lib/libcrypto/sha/sha256_amd64_generic.S
index a5bb3eca42..a860b014f2 100644
--- a/src/lib/libcrypto/sha/sha256_amd64_generic.S
+++ b/src/lib/libcrypto/sha/sha256_amd64_generic.S
@@ -1,4 +1,4 @@
1/* $OpenBSD: sha256_amd64_generic.S,v 1.4 2026/01/17 06:31:45 jsing Exp $ */ 1/* $OpenBSD: sha256_amd64_generic.S,v 1.5 2026/01/24 14:20:52 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -51,7 +51,7 @@
51#define sha256_message_schedule_load(idx, m, w, wt) \ 51#define sha256_message_schedule_load(idx, m, w, wt) \
52 movl (m, round, 4), wt; \ 52 movl (m, round, 4), wt; \
53 bswapl wt; \ 53 bswapl wt; \
54 movl wt, ((idx&0xf)*4)(w); 54 movl wt, ((idx&0xf)*4)(w)
55 55
56/* 56/*
57 * Update message schedule and return current value in wt: 57 * Update message schedule and return current value in wt:
@@ -82,7 +82,7 @@
82 xorl tmp3, tmp2; /* sigma0 */ \ 82 xorl tmp3, tmp2; /* sigma0 */ \
83 addl tmp2, wt; /* sigma0 */ \ 83 addl tmp2, wt; /* sigma0 */ \
84 \ 84 \
85 movl wt, ((idx&0xf)*4)(w); 85 movl wt, ((idx&0xf)*4)(w)
86 86
87/* 87/*
88 * Compute a SHA-256 round: 88 * Compute a SHA-256 round:
@@ -133,14 +133,14 @@
133 xorl tmp2, tmp3; /* T2 Maj */ \ 133 xorl tmp2, tmp3; /* T2 Maj */ \
134 addl tmp3, h; /* T2 Maj */ \ 134 addl tmp3, h; /* T2 Maj */ \
135 \ 135 \
136 addq $1, round; 136 addq $1, round
137 137
138#define sha256_round_load(idx, a, b, c, d, e, f, g, h) \ 138#define sha256_round_load(idx, a, b, c, d, e, f, g, h) \
139 sha256_message_schedule_load(idx, in, %rsp, tmp0) \ 139 sha256_message_schedule_load(idx, in, %rsp, tmp0); \
140 sha256_round(idx, a, b, c, d, e, f, g, h, k256, %rsp, tmp0) 140 sha256_round(idx, a, b, c, d, e, f, g, h, k256, %rsp, tmp0)
141 141
142#define sha256_round_update(idx, a, b, c, d, e, f, g, h) \ 142#define sha256_round_update(idx, a, b, c, d, e, f, g, h) \
143 sha256_message_schedule_update(idx, %rsp, tmp0) \ 143 sha256_message_schedule_update(idx, %rsp, tmp0); \
144 sha256_round(idx, a, b, c, d, e, f, g, h, k256, %rsp, tmp0) 144 sha256_round(idx, a, b, c, d, e, f, g, h, k256, %rsp, tmp0)
145 145
146.section .text 146.section .text
diff --git a/src/lib/libcrypto/sha/sha256_amd64_shani.S b/src/lib/libcrypto/sha/sha256_amd64_shani.S
index e43ecfa51f..b6f32b72cf 100644
--- a/src/lib/libcrypto/sha/sha256_amd64_shani.S
+++ b/src/lib/libcrypto/sha/sha256_amd64_shani.S
@@ -1,4 +1,4 @@
1/* $OpenBSD: sha256_amd64_shani.S,v 1.2 2026/01/17 06:31:45 jsing Exp $ */ 1/* $OpenBSD: sha256_amd64_shani.S,v 1.3 2026/01/24 14:20:52 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -56,29 +56,29 @@
56#define sha256_message_schedule_load(idx, m, xmsgtmp) \ 56#define sha256_message_schedule_load(idx, m, xmsgtmp) \
57 movdqu (idx*16)(m), xmsg; \ 57 movdqu (idx*16)(m), xmsg; \
58 pshufb xshufmask, xmsg; \ 58 pshufb xshufmask, xmsg; \
59 movdqa xmsg, xmsgtmp; 59 movdqa xmsg, xmsgtmp
60 60
61#define sha256_message_schedule_update(xmt0, xmt1, xmt2, xmt3) \ 61#define sha256_message_schedule_update(xmt0, xmt1, xmt2, xmt3) \
62 sha256msg1 xmt1, xmt0; \ 62 sha256msg1 xmt1, xmt0; \
63 movdqa xmt3, xmsgtmp4; \ 63 movdqa xmt3, xmsgtmp4; \
64 palignr $4, xmt2, xmsgtmp4; \ 64 palignr $4, xmt2, xmsgtmp4; \
65 paddd xmsgtmp4, xmt0; \ 65 paddd xmsgtmp4, xmt0; \
66 sha256msg2 xmt3, xmt0; 66 sha256msg2 xmt3, xmt0
67 67
68#define sha256_shani_round(idx) \ 68#define sha256_shani_round(idx) \
69 paddd (idx*16)(k256), xmsg; \ 69 paddd (idx*16)(k256), xmsg; \
70 sha256rnds2 xmsg, xhs0, xhs1; \ 70 sha256rnds2 xmsg, xhs0, xhs1; \
71 pshufd $0x0e, xmsg, xmsg; \ 71 pshufd $0x0e, xmsg, xmsg; \
72 sha256rnds2 xmsg, xhs1, xhs0; 72 sha256rnds2 xmsg, xhs1, xhs0
73 73
74#define sha256_shani_round_load(idx, m, xmsgtmp) \ 74#define sha256_shani_round_load(idx, m, xmsgtmp) \
75 sha256_message_schedule_load(idx, m, xmsgtmp); \ 75 sha256_message_schedule_load(idx, m, xmsgtmp); \
76 sha256_shani_round(idx); 76 sha256_shani_round(idx)
77 77
78#define sha256_shani_round_update(idx, xmt0, xmt1, xmt2, xmt3) \ 78#define sha256_shani_round_update(idx, xmt0, xmt1, xmt2, xmt3) \
79 sha256_message_schedule_update(xmt0, xmt1, xmt2, xmt3); \ 79 sha256_message_schedule_update(xmt0, xmt1, xmt2, xmt3); \
80 movdqa xmt0, xmsg; \ 80 movdqa xmt0, xmsg; \
81 sha256_shani_round(idx); 81 sha256_shani_round(idx)
82 82
83.section .text 83.section .text
84 84
diff --git a/src/lib/libcrypto/sha/sha512_amd64_generic.S b/src/lib/libcrypto/sha/sha512_amd64_generic.S
index 3b6a9719e1..052367df07 100644
--- a/src/lib/libcrypto/sha/sha512_amd64_generic.S
+++ b/src/lib/libcrypto/sha/sha512_amd64_generic.S
@@ -1,4 +1,4 @@
1/* $OpenBSD: sha512_amd64_generic.S,v 1.2 2026/01/17 06:31:45 jsing Exp $ */ 1/* $OpenBSD: sha512_amd64_generic.S,v 1.3 2026/01/24 14:20:52 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -51,7 +51,7 @@
51#define sha512_message_schedule_load(idx, m, w, wt) \ 51#define sha512_message_schedule_load(idx, m, w, wt) \
52 movq (m, round, 8), wt; \ 52 movq (m, round, 8), wt; \
53 bswapq wt; \ 53 bswapq wt; \
54 movq wt, ((idx&0xf)*8)(w); 54 movq wt, ((idx&0xf)*8)(w)
55 55
56/* 56/*
57 * Update message schedule and return current value in wt: 57 * Update message schedule and return current value in wt:
@@ -83,7 +83,7 @@
83 xorq tmp3, tmp2; /* sigma0 */ \ 83 xorq tmp3, tmp2; /* sigma0 */ \
84 addq tmp2, wt; /* sigma0 */ \ 84 addq tmp2, wt; /* sigma0 */ \
85 \ 85 \
86 movq wt, ((idx&0xf)*8)(w); 86 movq wt, ((idx&0xf)*8)(w)
87 87
88/* 88/*
89 * Compute a SHA-512 round: 89 * Compute a SHA-512 round:
@@ -134,14 +134,14 @@
134 xorq tmp2, tmp3; /* T2 Maj */ \ 134 xorq tmp2, tmp3; /* T2 Maj */ \
135 addq tmp3, h; /* T2 Maj */ \ 135 addq tmp3, h; /* T2 Maj */ \
136 \ 136 \
137 addq $1, round; 137 addq $1, round
138 138
139#define sha512_round_load(idx, a, b, c, d, e, f, g, h) \ 139#define sha512_round_load(idx, a, b, c, d, e, f, g, h) \
140 sha512_message_schedule_load(idx, in, %rsp, tmp0) \ 140 sha512_message_schedule_load(idx, in, %rsp, tmp0); \
141 sha512_round(idx, a, b, c, d, e, f, g, h, k512, %rsp, tmp0) 141 sha512_round(idx, a, b, c, d, e, f, g, h, k512, %rsp, tmp0)
142 142
143#define sha512_round_update(idx, a, b, c, d, e, f, g, h) \ 143#define sha512_round_update(idx, a, b, c, d, e, f, g, h) \
144 sha512_message_schedule_update(idx, %rsp, tmp0) \ 144 sha512_message_schedule_update(idx, %rsp, tmp0); \
145 sha512_round(idx, a, b, c, d, e, f, g, h, k512, %rsp, tmp0) 145 sha512_round(idx, a, b, c, d, e, f, g, h, k512, %rsp, tmp0)
146 146
147.section .text 147.section .text