diff options
| -rw-r--r-- | src/lib/libcrypto/sha/sha1_aarch64_ce.S | 61 | ||||
| -rw-r--r-- | src/lib/libcrypto/sha/sha256_aarch64_ce.S | 27 | ||||
| -rw-r--r-- | src/lib/libcrypto/sha/sha512_aarch64_ce.S | 39 |
3 files changed, 72 insertions, 55 deletions
diff --git a/src/lib/libcrypto/sha/sha1_aarch64_ce.S b/src/lib/libcrypto/sha/sha1_aarch64_ce.S index 853d467641..641500a1e5 100644 --- a/src/lib/libcrypto/sha/sha1_aarch64_ce.S +++ b/src/lib/libcrypto/sha/sha1_aarch64_ce.S | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: sha1_aarch64_ce.S,v 1.4 2026/01/24 14:20:52 jsing Exp $ */ | 1 | /* $OpenBSD: sha1_aarch64_ce.S,v 1.5 2026/01/25 08:22:17 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -40,6 +40,7 @@ | |||
| 40 | #define hc1s s17 | 40 | #define hc1s s17 |
| 41 | 41 | ||
| 42 | #define hs0 v18 | 42 | #define hs0 v18 |
| 43 | #define hs0q q18 | ||
| 43 | #define hs1 v19 | 44 | #define hs1 v19 |
| 44 | #define hs1s s19 | 45 | #define hs1s s19 |
| 45 | 46 | ||
| @@ -76,26 +77,26 @@ | |||
| 76 | #define sha1_round1(h0, h1, w, k) \ | 77 | #define sha1_round1(h0, h1, w, k) \ |
| 77 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ | 78 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ |
| 78 | mov tmp1, h0.s[0]; \ | 79 | mov tmp1, h0.s[0]; \ |
| 79 | sha1c h0, h1, tmp0.4s; \ | 80 | sha1c h0##q, h1##s, tmp0.4s; \ |
| 80 | sha1h h1, tmp1 | 81 | sha1h h1##s, tmp1 |
| 81 | 82 | ||
| 82 | #define sha1_round2(h0, h1, w, k) \ | 83 | #define sha1_round2(h0, h1, w, k) \ |
| 83 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ | 84 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ |
| 84 | mov tmp1, h0.s[0]; \ | 85 | mov tmp1, h0.s[0]; \ |
| 85 | sha1p h0, h1, tmp0.4s; \ | 86 | sha1p h0##q, h1##s, tmp0.4s; \ |
| 86 | sha1h h1, tmp1 | 87 | sha1h h1##s, tmp1 |
| 87 | 88 | ||
| 88 | #define sha1_round3(h0, h1, w, k) \ | 89 | #define sha1_round3(h0, h1, w, k) \ |
| 89 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ | 90 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ |
| 90 | mov tmp1, h0.s[0]; \ | 91 | mov tmp1, h0.s[0]; \ |
| 91 | sha1m h0, h1, tmp0.4s; \ | 92 | sha1m h0##q, h1##s, tmp0.4s; \ |
| 92 | sha1h h1, tmp1 | 93 | sha1h h1##s, tmp1 |
| 93 | 94 | ||
| 94 | #define sha1_round4(h0, h1, w, k) \ | 95 | #define sha1_round4(h0, h1, w, k) \ |
| 95 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ | 96 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ |
| 96 | mov tmp1, h0.s[0]; \ | 97 | mov tmp1, h0.s[0]; \ |
| 97 | sha1p h0, h1, tmp0.4s; \ | 98 | sha1p h0##q, h1##s, tmp0.4s; \ |
| 98 | sha1h h1, tmp1 | 99 | sha1h h1##s, tmp1 |
| 99 | 100 | ||
| 100 | .arch armv8-a+sha2 | 101 | .arch armv8-a+sha2 |
| 101 | 102 | ||
| @@ -140,7 +141,7 @@ sha1_block_ce: | |||
| 140 | 141 | ||
| 141 | .Lblock_loop: | 142 | .Lblock_loop: |
| 142 | /* Copy current hash state. */ | 143 | /* Copy current hash state. */ |
| 143 | mov hs0.4s, hc0.4s | 144 | mov hs0.16b, hc0.16b |
| 144 | mov hs1s, hc1.s[0] | 145 | mov hs1s, hc1.s[0] |
| 145 | 146 | ||
| 146 | /* Load and byte swap message schedule. */ | 147 | /* Load and byte swap message schedule. */ |
| @@ -151,10 +152,10 @@ sha1_block_ce: | |||
| 151 | rev32 w3.16b, w3.16b | 152 | rev32 w3.16b, w3.16b |
| 152 | 153 | ||
| 153 | /* Rounds 0 through 15 (four rounds at a time). */ | 154 | /* Rounds 0 through 15 (four rounds at a time). */ |
| 154 | sha1_round1(hs0, hs1s, w0, k0) | 155 | sha1_round1(hs0, hs1, w0, k0) |
| 155 | sha1_round1(hs0, hs1s, w1, k0) | 156 | sha1_round1(hs0, hs1, w1, k0) |
| 156 | sha1_round1(hs0, hs1s, w2, k0) | 157 | sha1_round1(hs0, hs1, w2, k0) |
| 157 | sha1_round1(hs0, hs1s, w3, k0) | 158 | sha1_round1(hs0, hs1, w3, k0) |
| 158 | 159 | ||
| 159 | /* Rounds 16 through 31 (four rounds at a time). */ | 160 | /* Rounds 16 through 31 (four rounds at a time). */ |
| 160 | sha1_message_schedule_update(w0, w1, w2, w3) | 161 | sha1_message_schedule_update(w0, w1, w2, w3) |
| @@ -162,10 +163,10 @@ sha1_block_ce: | |||
| 162 | sha1_message_schedule_update(w2, w3, w0, w1) | 163 | sha1_message_schedule_update(w2, w3, w0, w1) |
| 163 | sha1_message_schedule_update(w3, w0, w1, w2) | 164 | sha1_message_schedule_update(w3, w0, w1, w2) |
| 164 | 165 | ||
| 165 | sha1_round1(hs0, hs1s, w0, k0) | 166 | sha1_round1(hs0, hs1, w0, k0) |
| 166 | sha1_round2(hs0, hs1s, w1, k1) | 167 | sha1_round2(hs0, hs1, w1, k1) |
| 167 | sha1_round2(hs0, hs1s, w2, k1) | 168 | sha1_round2(hs0, hs1, w2, k1) |
| 168 | sha1_round2(hs0, hs1s, w3, k1) | 169 | sha1_round2(hs0, hs1, w3, k1) |
| 169 | 170 | ||
| 170 | /* Rounds 32 through 47 (four rounds at a time). */ | 171 | /* Rounds 32 through 47 (four rounds at a time). */ |
| 171 | sha1_message_schedule_update(w0, w1, w2, w3) | 172 | sha1_message_schedule_update(w0, w1, w2, w3) |
| @@ -173,10 +174,10 @@ sha1_block_ce: | |||
| 173 | sha1_message_schedule_update(w2, w3, w0, w1) | 174 | sha1_message_schedule_update(w2, w3, w0, w1) |
| 174 | sha1_message_schedule_update(w3, w0, w1, w2) | 175 | sha1_message_schedule_update(w3, w0, w1, w2) |
| 175 | 176 | ||
| 176 | sha1_round2(hs0, hs1s, w0, k1) | 177 | sha1_round2(hs0, hs1, w0, k1) |
| 177 | sha1_round2(hs0, hs1s, w1, k1) | 178 | sha1_round2(hs0, hs1, w1, k1) |
| 178 | sha1_round3(hs0, hs1s, w2, k2) | 179 | sha1_round3(hs0, hs1, w2, k2) |
| 179 | sha1_round3(hs0, hs1s, w3, k2) | 180 | sha1_round3(hs0, hs1, w3, k2) |
| 180 | 181 | ||
| 181 | /* Rounds 48 through 63 (four rounds at a time). */ | 182 | /* Rounds 48 through 63 (four rounds at a time). */ |
| 182 | sha1_message_schedule_update(w0, w1, w2, w3) | 183 | sha1_message_schedule_update(w0, w1, w2, w3) |
| @@ -184,10 +185,10 @@ sha1_block_ce: | |||
| 184 | sha1_message_schedule_update(w2, w3, w0, w1) | 185 | sha1_message_schedule_update(w2, w3, w0, w1) |
| 185 | sha1_message_schedule_update(w3, w0, w1, w2) | 186 | sha1_message_schedule_update(w3, w0, w1, w2) |
| 186 | 187 | ||
| 187 | sha1_round3(hs0, hs1s, w0, k2) | 188 | sha1_round3(hs0, hs1, w0, k2) |
| 188 | sha1_round3(hs0, hs1s, w1, k2) | 189 | sha1_round3(hs0, hs1, w1, k2) |
| 189 | sha1_round3(hs0, hs1s, w2, k2) | 190 | sha1_round3(hs0, hs1, w2, k2) |
| 190 | sha1_round4(hs0, hs1s, w3, k3) | 191 | sha1_round4(hs0, hs1, w3, k3) |
| 191 | 192 | ||
| 192 | /* Rounds 64 through 79 (four rounds at a time). */ | 193 | /* Rounds 64 through 79 (four rounds at a time). */ |
| 193 | sha1_message_schedule_update(w0, w1, w2, w3) | 194 | sha1_message_schedule_update(w0, w1, w2, w3) |
| @@ -195,10 +196,10 @@ sha1_block_ce: | |||
| 195 | sha1_message_schedule_update(w2, w3, w0, w1) | 196 | sha1_message_schedule_update(w2, w3, w0, w1) |
| 196 | sha1_message_schedule_update(w3, w0, w1, w2) | 197 | sha1_message_schedule_update(w3, w0, w1, w2) |
| 197 | 198 | ||
| 198 | sha1_round4(hs0, hs1s, w0, k3) | 199 | sha1_round4(hs0, hs1, w0, k3) |
| 199 | sha1_round4(hs0, hs1s, w1, k3) | 200 | sha1_round4(hs0, hs1, w1, k3) |
| 200 | sha1_round4(hs0, hs1s, w2, k3) | 201 | sha1_round4(hs0, hs1, w2, k3) |
| 201 | sha1_round4(hs0, hs1s, w3, k3) | 202 | sha1_round4(hs0, hs1, w3, k3) |
| 202 | 203 | ||
| 203 | /* Add intermediate state to hash state. */ | 204 | /* Add intermediate state to hash state. */ |
| 204 | add hc0.4s, hc0.4s, hs0.4s | 205 | add hc0.4s, hc0.4s, hs0.4s |
diff --git a/src/lib/libcrypto/sha/sha256_aarch64_ce.S b/src/lib/libcrypto/sha/sha256_aarch64_ce.S index 343f338390..8a26f91b06 100644 --- a/src/lib/libcrypto/sha/sha256_aarch64_ce.S +++ b/src/lib/libcrypto/sha/sha256_aarch64_ce.S | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: sha256_aarch64_ce.S,v 1.5 2026/01/24 14:20:52 jsing Exp $ */ | 1 | /* $OpenBSD: sha256_aarch64_ce.S,v 1.6 2026/01/25 08:22:17 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -54,6 +54,10 @@ | |||
| 54 | 54 | ||
| 55 | #define tmp0 v28 | 55 | #define tmp0 v28 |
| 56 | #define tmp1 v29 | 56 | #define tmp1 v29 |
| 57 | #define tmp1q q29 | ||
| 58 | |||
| 59 | #define v18q q18 | ||
| 60 | #define v19q q19 | ||
| 57 | 61 | ||
| 58 | /* | 62 | /* |
| 59 | * Update message schedule for m0 (W0:W1:W2:W3), using m1 (W4:W5:W6:W7), | 63 | * Update message schedule for m0 (W0:W1:W2:W3), using m1 (W4:W5:W6:W7), |
| @@ -74,9 +78,12 @@ | |||
| 74 | */ | 78 | */ |
| 75 | #define sha256_round(h0, h1, w, k) \ | 79 | #define sha256_round(h0, h1, w, k) \ |
| 76 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ | 80 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ |
| 77 | mov tmp1.4s, h0.4s; \ | 81 | mov tmp1.16b, h0.16b; \ |
| 78 | sha256h h0, h1, tmp0.4s; \ | 82 | sha256h h0##q, h1##q, tmp0.4s; \ |
| 79 | sha256h2 h1, tmp1, tmp0.4s | 83 | sha256h2 h1##q, tmp1##q, tmp0.4s |
| 84 | |||
| 85 | #define sha256_round_initial(h0, h1, w, k) \ | ||
| 86 | sha256_round(h0, h1, w, k) | ||
| 80 | 87 | ||
| 81 | #define sha256_round_update(h0, h1, m0, m1, m2, m3, k) \ | 88 | #define sha256_round_update(h0, h1, m0, m1, m2, m3, k) \ |
| 82 | sha256_message_schedule_update(m0, m1, m2, m3); \ | 89 | sha256_message_schedule_update(m0, m1, m2, m3); \ |
| @@ -109,8 +116,8 @@ sha256_block_ce: | |||
| 109 | mov k256, k256_base | 116 | mov k256, k256_base |
| 110 | 117 | ||
| 111 | /* Copy current hash state. */ | 118 | /* Copy current hash state. */ |
| 112 | mov hs0.4s, hc0.4s | 119 | mov hs0.16b, hc0.16b |
| 113 | mov hs1.4s, hc1.4s | 120 | mov hs1.16b, hc1.16b |
| 114 | 121 | ||
| 115 | /* Load and byte swap message schedule. */ | 122 | /* Load and byte swap message schedule. */ |
| 116 | ld1 {w0.16b, w1.16b, w2.16b, w3.16b}, [in], #64 | 123 | ld1 {w0.16b, w1.16b, w2.16b, w3.16b}, [in], #64 |
| @@ -122,10 +129,10 @@ sha256_block_ce: | |||
| 122 | /* Rounds 0 through 15 (four rounds at a time). */ | 129 | /* Rounds 0 through 15 (four rounds at a time). */ |
| 123 | ld1 {k0.4s, k1.4s, k2.4s, k3.4s}, [k256], #64 | 130 | ld1 {k0.4s, k1.4s, k2.4s, k3.4s}, [k256], #64 |
| 124 | 131 | ||
| 125 | sha256_round(hs0, hs1, w0, k0) | 132 | sha256_round_initial(hs0, hs1, w0, k0) |
| 126 | sha256_round(hs0, hs1, w1, k1) | 133 | sha256_round_initial(hs0, hs1, w1, k1) |
| 127 | sha256_round(hs0, hs1, w2, k2) | 134 | sha256_round_initial(hs0, hs1, w2, k2) |
| 128 | sha256_round(hs0, hs1, w3, k3) | 135 | sha256_round_initial(hs0, hs1, w3, k3) |
| 129 | 136 | ||
| 130 | /* Rounds 16 through 31 (four rounds at a time). */ | 137 | /* Rounds 16 through 31 (four rounds at a time). */ |
| 131 | ld1 {k0.4s, k1.4s, k2.4s, k3.4s}, [k256], #64 | 138 | ld1 {k0.4s, k1.4s, k2.4s, k3.4s}, [k256], #64 |
diff --git a/src/lib/libcrypto/sha/sha512_aarch64_ce.S b/src/lib/libcrypto/sha/sha512_aarch64_ce.S index bec56a49e5..6efe775ff5 100644 --- a/src/lib/libcrypto/sha/sha512_aarch64_ce.S +++ b/src/lib/libcrypto/sha/sha512_aarch64_ce.S | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: sha512_aarch64_ce.S,v 1.3 2026/01/17 06:31:45 jsing Exp $ */ | 1 | /* $OpenBSD: sha512_aarch64_ce.S,v 1.4 2026/01/25 08:22:17 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -83,9 +83,15 @@ | |||
| 83 | #define k7 v27 | 83 | #define k7 v27 |
| 84 | 84 | ||
| 85 | #define tmp0 v8 | 85 | #define tmp0 v8 |
| 86 | #define tmp0q q8 | ||
| 86 | #define tmp1 v9 | 87 | #define tmp1 v9 |
| 87 | #define tmp2 v18 | 88 | #define tmp2 v18 |
| 88 | 89 | ||
| 90 | #define v0q q0 | ||
| 91 | #define v1q q1 | ||
| 92 | #define v4q q4 | ||
| 93 | #define v6q q6 | ||
| 94 | |||
| 89 | /* | 95 | /* |
| 90 | * Update message schedule for m0 (W0:W1), using m1 (W2:W3), m4 (W8:W9), | 96 | * Update message schedule for m0 (W0:W1), using m1 (W2:W3), m4 (W8:W9), |
| 91 | * m5 (W10:W11) and m7 (W14:W15). The sha512su0 instruction computes the sigma0 | 97 | * m5 (W10:W11) and m7 (W14:W15). The sha512su0 instruction computes the sigma0 |
| @@ -141,9 +147,12 @@ | |||
| 141 | add h4.2d, h4.2d, h3.2d; /* W1:W0 += g:h */ \ | 147 | add h4.2d, h4.2d, h3.2d; /* W1:W0 += g:h */ \ |
| 142 | ext tmp0.16b, h2.16b, h3.16b, #8; /* f:g */ \ | 148 | ext tmp0.16b, h2.16b, h3.16b, #8; /* f:g */ \ |
| 143 | ext tmp1.16b, h1.16b, h2.16b, #8; /* d:e */ \ | 149 | ext tmp1.16b, h1.16b, h2.16b, #8; /* d:e */ \ |
| 144 | sha512h h4, tmp0, tmp1.2d; /* T1 */ \ | 150 | sha512h h4##q, tmp0##q, tmp1.2d; /* T1 */ \ |
| 145 | add h5.2d, h1.2d, h4.2d; /* c:d + T1 */ \ | 151 | add h5.2d, h1.2d, h4.2d; /* c:d + T1 */ \ |
| 146 | sha512h2 h4, h1, h0.2d; /* T1 + T2 */ | 152 | sha512h2 h4##q, h1##q, h0.2d; /* T1 + T2 */ |
| 153 | |||
| 154 | #define sha512_round_initial(h0, h1, h2, h3, h4, h5, w, k) \ | ||
| 155 | sha512_round(h0, h1, h2, h3, h4, h5, w, k) | ||
| 147 | 156 | ||
| 148 | #define sha512_round_update(h0, h1, h2, h3, h4, h5, m0, m1, m2, m3, m4, k) \ | 157 | #define sha512_round_update(h0, h1, h2, h3, h4, h5, m0, m1, m2, m3, m4, k) \ |
| 149 | sha512_message_schedule_update(m0, m1, m2, m3, m4) \ | 158 | sha512_message_schedule_update(m0, m1, m2, m3, m4) \ |
| @@ -181,10 +190,10 @@ sha512_block_ce: | |||
| 181 | mov k512, k512_base | 190 | mov k512, k512_base |
| 182 | 191 | ||
| 183 | /* Copy current hash state. */ | 192 | /* Copy current hash state. */ |
| 184 | mov hs0.2d, hc0.2d | 193 | mov hs0.16b, hc0.16b |
| 185 | mov hs1.2d, hc1.2d | 194 | mov hs1.16b, hc1.16b |
| 186 | mov hs2.2d, hc2.2d | 195 | mov hs2.16b, hc2.16b |
| 187 | mov hs3.2d, hc3.2d | 196 | mov hs3.16b, hc3.16b |
| 188 | 197 | ||
| 189 | /* Load and byte swap message schedule. */ | 198 | /* Load and byte swap message schedule. */ |
| 190 | ld1 {w0.16b, w1.16b, w2.16b, w3.16b}, [in], #64 | 199 | ld1 {w0.16b, w1.16b, w2.16b, w3.16b}, [in], #64 |
| @@ -203,14 +212,14 @@ sha512_block_ce: | |||
| 203 | ld1 {k0.2d, k1.2d, k2.2d, k3.2d}, [k512], #64 | 212 | ld1 {k0.2d, k1.2d, k2.2d, k3.2d}, [k512], #64 |
| 204 | ld1 {k4.2d, k5.2d, k6.2d, k7.2d}, [k512], #64 | 213 | ld1 {k4.2d, k5.2d, k6.2d, k7.2d}, [k512], #64 |
| 205 | 214 | ||
| 206 | sha512_round(hs0, hs1, hs2, hs3, hs4, hs5, w0, k0) | 215 | sha512_round_initial(hs0, hs1, hs2, hs3, hs4, hs5, w0, k0) |
| 207 | sha512_round(hs4, hs0, hs5, hs2, hs6, hs7, w1, k1) | 216 | sha512_round_initial(hs4, hs0, hs5, hs2, hs6, hs7, w1, k1) |
| 208 | sha512_round(hs6, hs4, hs7, hs5, hs1, hs3, w2, k2) | 217 | sha512_round_initial(hs6, hs4, hs7, hs5, hs1, hs3, w2, k2) |
| 209 | sha512_round(hs1, hs6, hs3, hs7, hs0, hs2, w3, k3) | 218 | sha512_round_initial(hs1, hs6, hs3, hs7, hs0, hs2, w3, k3) |
| 210 | sha512_round(hs0, hs1, hs2, hs3, hs4, hs5, w4, k4) | 219 | sha512_round_initial(hs0, hs1, hs2, hs3, hs4, hs5, w4, k4) |
| 211 | sha512_round(hs4, hs0, hs5, hs2, hs6, hs7, w5, k5) | 220 | sha512_round_initial(hs4, hs0, hs5, hs2, hs6, hs7, w5, k5) |
| 212 | sha512_round(hs6, hs4, hs7, hs5, hs1, hs3, w6, k6) | 221 | sha512_round_initial(hs6, hs4, hs7, hs5, hs1, hs3, w6, k6) |
| 213 | sha512_round(hs1, hs6, hs3, hs7, hs0, hs2, w7, k7) | 222 | sha512_round_initial(hs1, hs6, hs3, hs7, hs0, hs2, w7, k7) |
| 214 | 223 | ||
| 215 | /* Rounds 16 through 31 (two rounds at a time). */ | 224 | /* Rounds 16 through 31 (two rounds at a time). */ |
| 216 | ld1 {k0.2d, k1.2d, k2.2d, k3.2d}, [k512], #64 | 225 | ld1 {k0.2d, k1.2d, k2.2d, k3.2d}, [k512], #64 |
