diff options
| author | jsing <> | 2026-01-25 08:22:17 +0000 |
|---|---|---|
| committer | jsing <> | 2026-01-25 08:22:17 +0000 |
| commit | c6ef8fc8d3285a55c002f52df2bb2df42b7734c0 (patch) | |
| tree | 541d7b395ea34307cec81109c20f473cbc5ea8fb /src | |
| parent | 1d4a03f3d650a577581aedee8cc799a3ad6668d8 (diff) | |
| download | openbsd-c6ef8fc8d3285a55c002f52df2bb2df42b7734c0.tar.gz openbsd-c6ef8fc8d3285a55c002f52df2bb2df42b7734c0.tar.bz2 openbsd-c6ef8fc8d3285a55c002f52df2bb2df42b7734c0.zip | |
Make SHA aarch64 assembly build with gcc.
gcc is extremely fussy about register naming and insists on q and s naming
for the ARM CE SHA instructions, even though they're referring to the same
register (while LLVM just figures it out). Work around this by mapping
registers to their required variant at usage and defining a handful of
mappings between v registers and alternate names/views.
This is still somewhat ugly, but seems to be one of the cleaner options
that will allow portable to enable SHA assembly on platforms that use gcc.
ok kenjiro@ tb@
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib/libcrypto/sha/sha1_aarch64_ce.S | 61 | ||||
| -rw-r--r-- | src/lib/libcrypto/sha/sha256_aarch64_ce.S | 27 | ||||
| -rw-r--r-- | src/lib/libcrypto/sha/sha512_aarch64_ce.S | 39 |
3 files changed, 72 insertions, 55 deletions
diff --git a/src/lib/libcrypto/sha/sha1_aarch64_ce.S b/src/lib/libcrypto/sha/sha1_aarch64_ce.S index 853d467641..641500a1e5 100644 --- a/src/lib/libcrypto/sha/sha1_aarch64_ce.S +++ b/src/lib/libcrypto/sha/sha1_aarch64_ce.S | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: sha1_aarch64_ce.S,v 1.4 2026/01/24 14:20:52 jsing Exp $ */ | 1 | /* $OpenBSD: sha1_aarch64_ce.S,v 1.5 2026/01/25 08:22:17 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -40,6 +40,7 @@ | |||
| 40 | #define hc1s s17 | 40 | #define hc1s s17 |
| 41 | 41 | ||
| 42 | #define hs0 v18 | 42 | #define hs0 v18 |
| 43 | #define hs0q q18 | ||
| 43 | #define hs1 v19 | 44 | #define hs1 v19 |
| 44 | #define hs1s s19 | 45 | #define hs1s s19 |
| 45 | 46 | ||
| @@ -76,26 +77,26 @@ | |||
| 76 | #define sha1_round1(h0, h1, w, k) \ | 77 | #define sha1_round1(h0, h1, w, k) \ |
| 77 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ | 78 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ |
| 78 | mov tmp1, h0.s[0]; \ | 79 | mov tmp1, h0.s[0]; \ |
| 79 | sha1c h0, h1, tmp0.4s; \ | 80 | sha1c h0##q, h1##s, tmp0.4s; \ |
| 80 | sha1h h1, tmp1 | 81 | sha1h h1##s, tmp1 |
| 81 | 82 | ||
| 82 | #define sha1_round2(h0, h1, w, k) \ | 83 | #define sha1_round2(h0, h1, w, k) \ |
| 83 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ | 84 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ |
| 84 | mov tmp1, h0.s[0]; \ | 85 | mov tmp1, h0.s[0]; \ |
| 85 | sha1p h0, h1, tmp0.4s; \ | 86 | sha1p h0##q, h1##s, tmp0.4s; \ |
| 86 | sha1h h1, tmp1 | 87 | sha1h h1##s, tmp1 |
| 87 | 88 | ||
| 88 | #define sha1_round3(h0, h1, w, k) \ | 89 | #define sha1_round3(h0, h1, w, k) \ |
| 89 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ | 90 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ |
| 90 | mov tmp1, h0.s[0]; \ | 91 | mov tmp1, h0.s[0]; \ |
| 91 | sha1m h0, h1, tmp0.4s; \ | 92 | sha1m h0##q, h1##s, tmp0.4s; \ |
| 92 | sha1h h1, tmp1 | 93 | sha1h h1##s, tmp1 |
| 93 | 94 | ||
| 94 | #define sha1_round4(h0, h1, w, k) \ | 95 | #define sha1_round4(h0, h1, w, k) \ |
| 95 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ | 96 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ |
| 96 | mov tmp1, h0.s[0]; \ | 97 | mov tmp1, h0.s[0]; \ |
| 97 | sha1p h0, h1, tmp0.4s; \ | 98 | sha1p h0##q, h1##s, tmp0.4s; \ |
| 98 | sha1h h1, tmp1 | 99 | sha1h h1##s, tmp1 |
| 99 | 100 | ||
| 100 | .arch armv8-a+sha2 | 101 | .arch armv8-a+sha2 |
| 101 | 102 | ||
| @@ -140,7 +141,7 @@ sha1_block_ce: | |||
| 140 | 141 | ||
| 141 | .Lblock_loop: | 142 | .Lblock_loop: |
| 142 | /* Copy current hash state. */ | 143 | /* Copy current hash state. */ |
| 143 | mov hs0.4s, hc0.4s | 144 | mov hs0.16b, hc0.16b |
| 144 | mov hs1s, hc1.s[0] | 145 | mov hs1s, hc1.s[0] |
| 145 | 146 | ||
| 146 | /* Load and byte swap message schedule. */ | 147 | /* Load and byte swap message schedule. */ |
| @@ -151,10 +152,10 @@ sha1_block_ce: | |||
| 151 | rev32 w3.16b, w3.16b | 152 | rev32 w3.16b, w3.16b |
| 152 | 153 | ||
| 153 | /* Rounds 0 through 15 (four rounds at a time). */ | 154 | /* Rounds 0 through 15 (four rounds at a time). */ |
| 154 | sha1_round1(hs0, hs1s, w0, k0) | 155 | sha1_round1(hs0, hs1, w0, k0) |
| 155 | sha1_round1(hs0, hs1s, w1, k0) | 156 | sha1_round1(hs0, hs1, w1, k0) |
| 156 | sha1_round1(hs0, hs1s, w2, k0) | 157 | sha1_round1(hs0, hs1, w2, k0) |
| 157 | sha1_round1(hs0, hs1s, w3, k0) | 158 | sha1_round1(hs0, hs1, w3, k0) |
| 158 | 159 | ||
| 159 | /* Rounds 16 through 31 (four rounds at a time). */ | 160 | /* Rounds 16 through 31 (four rounds at a time). */ |
| 160 | sha1_message_schedule_update(w0, w1, w2, w3) | 161 | sha1_message_schedule_update(w0, w1, w2, w3) |
| @@ -162,10 +163,10 @@ sha1_block_ce: | |||
| 162 | sha1_message_schedule_update(w2, w3, w0, w1) | 163 | sha1_message_schedule_update(w2, w3, w0, w1) |
| 163 | sha1_message_schedule_update(w3, w0, w1, w2) | 164 | sha1_message_schedule_update(w3, w0, w1, w2) |
| 164 | 165 | ||
| 165 | sha1_round1(hs0, hs1s, w0, k0) | 166 | sha1_round1(hs0, hs1, w0, k0) |
| 166 | sha1_round2(hs0, hs1s, w1, k1) | 167 | sha1_round2(hs0, hs1, w1, k1) |
| 167 | sha1_round2(hs0, hs1s, w2, k1) | 168 | sha1_round2(hs0, hs1, w2, k1) |
| 168 | sha1_round2(hs0, hs1s, w3, k1) | 169 | sha1_round2(hs0, hs1, w3, k1) |
| 169 | 170 | ||
| 170 | /* Rounds 32 through 47 (four rounds at a time). */ | 171 | /* Rounds 32 through 47 (four rounds at a time). */ |
| 171 | sha1_message_schedule_update(w0, w1, w2, w3) | 172 | sha1_message_schedule_update(w0, w1, w2, w3) |
| @@ -173,10 +174,10 @@ sha1_block_ce: | |||
| 173 | sha1_message_schedule_update(w2, w3, w0, w1) | 174 | sha1_message_schedule_update(w2, w3, w0, w1) |
| 174 | sha1_message_schedule_update(w3, w0, w1, w2) | 175 | sha1_message_schedule_update(w3, w0, w1, w2) |
| 175 | 176 | ||
| 176 | sha1_round2(hs0, hs1s, w0, k1) | 177 | sha1_round2(hs0, hs1, w0, k1) |
| 177 | sha1_round2(hs0, hs1s, w1, k1) | 178 | sha1_round2(hs0, hs1, w1, k1) |
| 178 | sha1_round3(hs0, hs1s, w2, k2) | 179 | sha1_round3(hs0, hs1, w2, k2) |
| 179 | sha1_round3(hs0, hs1s, w3, k2) | 180 | sha1_round3(hs0, hs1, w3, k2) |
| 180 | 181 | ||
| 181 | /* Rounds 48 through 63 (four rounds at a time). */ | 182 | /* Rounds 48 through 63 (four rounds at a time). */ |
| 182 | sha1_message_schedule_update(w0, w1, w2, w3) | 183 | sha1_message_schedule_update(w0, w1, w2, w3) |
| @@ -184,10 +185,10 @@ sha1_block_ce: | |||
| 184 | sha1_message_schedule_update(w2, w3, w0, w1) | 185 | sha1_message_schedule_update(w2, w3, w0, w1) |
| 185 | sha1_message_schedule_update(w3, w0, w1, w2) | 186 | sha1_message_schedule_update(w3, w0, w1, w2) |
| 186 | 187 | ||
| 187 | sha1_round3(hs0, hs1s, w0, k2) | 188 | sha1_round3(hs0, hs1, w0, k2) |
| 188 | sha1_round3(hs0, hs1s, w1, k2) | 189 | sha1_round3(hs0, hs1, w1, k2) |
| 189 | sha1_round3(hs0, hs1s, w2, k2) | 190 | sha1_round3(hs0, hs1, w2, k2) |
| 190 | sha1_round4(hs0, hs1s, w3, k3) | 191 | sha1_round4(hs0, hs1, w3, k3) |
| 191 | 192 | ||
| 192 | /* Rounds 64 through 79 (four rounds at a time). */ | 193 | /* Rounds 64 through 79 (four rounds at a time). */ |
| 193 | sha1_message_schedule_update(w0, w1, w2, w3) | 194 | sha1_message_schedule_update(w0, w1, w2, w3) |
| @@ -195,10 +196,10 @@ sha1_block_ce: | |||
| 195 | sha1_message_schedule_update(w2, w3, w0, w1) | 196 | sha1_message_schedule_update(w2, w3, w0, w1) |
| 196 | sha1_message_schedule_update(w3, w0, w1, w2) | 197 | sha1_message_schedule_update(w3, w0, w1, w2) |
| 197 | 198 | ||
| 198 | sha1_round4(hs0, hs1s, w0, k3) | 199 | sha1_round4(hs0, hs1, w0, k3) |
| 199 | sha1_round4(hs0, hs1s, w1, k3) | 200 | sha1_round4(hs0, hs1, w1, k3) |
| 200 | sha1_round4(hs0, hs1s, w2, k3) | 201 | sha1_round4(hs0, hs1, w2, k3) |
| 201 | sha1_round4(hs0, hs1s, w3, k3) | 202 | sha1_round4(hs0, hs1, w3, k3) |
| 202 | 203 | ||
| 203 | /* Add intermediate state to hash state. */ | 204 | /* Add intermediate state to hash state. */ |
| 204 | add hc0.4s, hc0.4s, hs0.4s | 205 | add hc0.4s, hc0.4s, hs0.4s |
diff --git a/src/lib/libcrypto/sha/sha256_aarch64_ce.S b/src/lib/libcrypto/sha/sha256_aarch64_ce.S index 343f338390..8a26f91b06 100644 --- a/src/lib/libcrypto/sha/sha256_aarch64_ce.S +++ b/src/lib/libcrypto/sha/sha256_aarch64_ce.S | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: sha256_aarch64_ce.S,v 1.5 2026/01/24 14:20:52 jsing Exp $ */ | 1 | /* $OpenBSD: sha256_aarch64_ce.S,v 1.6 2026/01/25 08:22:17 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -54,6 +54,10 @@ | |||
| 54 | 54 | ||
| 55 | #define tmp0 v28 | 55 | #define tmp0 v28 |
| 56 | #define tmp1 v29 | 56 | #define tmp1 v29 |
| 57 | #define tmp1q q29 | ||
| 58 | |||
| 59 | #define v18q q18 | ||
| 60 | #define v19q q19 | ||
| 57 | 61 | ||
| 58 | /* | 62 | /* |
| 59 | * Update message schedule for m0 (W0:W1:W2:W3), using m1 (W4:W5:W6:W7), | 63 | * Update message schedule for m0 (W0:W1:W2:W3), using m1 (W4:W5:W6:W7), |
| @@ -74,9 +78,12 @@ | |||
| 74 | */ | 78 | */ |
| 75 | #define sha256_round(h0, h1, w, k) \ | 79 | #define sha256_round(h0, h1, w, k) \ |
| 76 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ | 80 | add tmp0.4s, w.4s, k.4s; /* Tt = Wt + Kt */ \ |
| 77 | mov tmp1.4s, h0.4s; \ | 81 | mov tmp1.16b, h0.16b; \ |
| 78 | sha256h h0, h1, tmp0.4s; \ | 82 | sha256h h0##q, h1##q, tmp0.4s; \ |
| 79 | sha256h2 h1, tmp1, tmp0.4s | 83 | sha256h2 h1##q, tmp1##q, tmp0.4s |
| 84 | |||
| 85 | #define sha256_round_initial(h0, h1, w, k) \ | ||
| 86 | sha256_round(h0, h1, w, k) | ||
| 80 | 87 | ||
| 81 | #define sha256_round_update(h0, h1, m0, m1, m2, m3, k) \ | 88 | #define sha256_round_update(h0, h1, m0, m1, m2, m3, k) \ |
| 82 | sha256_message_schedule_update(m0, m1, m2, m3); \ | 89 | sha256_message_schedule_update(m0, m1, m2, m3); \ |
| @@ -109,8 +116,8 @@ sha256_block_ce: | |||
| 109 | mov k256, k256_base | 116 | mov k256, k256_base |
| 110 | 117 | ||
| 111 | /* Copy current hash state. */ | 118 | /* Copy current hash state. */ |
| 112 | mov hs0.4s, hc0.4s | 119 | mov hs0.16b, hc0.16b |
| 113 | mov hs1.4s, hc1.4s | 120 | mov hs1.16b, hc1.16b |
| 114 | 121 | ||
| 115 | /* Load and byte swap message schedule. */ | 122 | /* Load and byte swap message schedule. */ |
| 116 | ld1 {w0.16b, w1.16b, w2.16b, w3.16b}, [in], #64 | 123 | ld1 {w0.16b, w1.16b, w2.16b, w3.16b}, [in], #64 |
| @@ -122,10 +129,10 @@ sha256_block_ce: | |||
| 122 | /* Rounds 0 through 15 (four rounds at a time). */ | 129 | /* Rounds 0 through 15 (four rounds at a time). */ |
| 123 | ld1 {k0.4s, k1.4s, k2.4s, k3.4s}, [k256], #64 | 130 | ld1 {k0.4s, k1.4s, k2.4s, k3.4s}, [k256], #64 |
| 124 | 131 | ||
| 125 | sha256_round(hs0, hs1, w0, k0) | 132 | sha256_round_initial(hs0, hs1, w0, k0) |
| 126 | sha256_round(hs0, hs1, w1, k1) | 133 | sha256_round_initial(hs0, hs1, w1, k1) |
| 127 | sha256_round(hs0, hs1, w2, k2) | 134 | sha256_round_initial(hs0, hs1, w2, k2) |
| 128 | sha256_round(hs0, hs1, w3, k3) | 135 | sha256_round_initial(hs0, hs1, w3, k3) |
| 129 | 136 | ||
| 130 | /* Rounds 16 through 31 (four rounds at a time). */ | 137 | /* Rounds 16 through 31 (four rounds at a time). */ |
| 131 | ld1 {k0.4s, k1.4s, k2.4s, k3.4s}, [k256], #64 | 138 | ld1 {k0.4s, k1.4s, k2.4s, k3.4s}, [k256], #64 |
diff --git a/src/lib/libcrypto/sha/sha512_aarch64_ce.S b/src/lib/libcrypto/sha/sha512_aarch64_ce.S index bec56a49e5..6efe775ff5 100644 --- a/src/lib/libcrypto/sha/sha512_aarch64_ce.S +++ b/src/lib/libcrypto/sha/sha512_aarch64_ce.S | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: sha512_aarch64_ce.S,v 1.3 2026/01/17 06:31:45 jsing Exp $ */ | 1 | /* $OpenBSD: sha512_aarch64_ce.S,v 1.4 2026/01/25 08:22:17 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023,2025 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -83,9 +83,15 @@ | |||
| 83 | #define k7 v27 | 83 | #define k7 v27 |
| 84 | 84 | ||
| 85 | #define tmp0 v8 | 85 | #define tmp0 v8 |
| 86 | #define tmp0q q8 | ||
| 86 | #define tmp1 v9 | 87 | #define tmp1 v9 |
| 87 | #define tmp2 v18 | 88 | #define tmp2 v18 |
| 88 | 89 | ||
| 90 | #define v0q q0 | ||
| 91 | #define v1q q1 | ||
| 92 | #define v4q q4 | ||
| 93 | #define v6q q6 | ||
| 94 | |||
| 89 | /* | 95 | /* |
| 90 | * Update message schedule for m0 (W0:W1), using m1 (W2:W3), m4 (W8:W9), | 96 | * Update message schedule for m0 (W0:W1), using m1 (W2:W3), m4 (W8:W9), |
| 91 | * m5 (W10:W11) and m7 (W14:W15). The sha512su0 instruction computes the sigma0 | 97 | * m5 (W10:W11) and m7 (W14:W15). The sha512su0 instruction computes the sigma0 |
| @@ -141,9 +147,12 @@ | |||
| 141 | add h4.2d, h4.2d, h3.2d; /* W1:W0 += g:h */ \ | 147 | add h4.2d, h4.2d, h3.2d; /* W1:W0 += g:h */ \ |
| 142 | ext tmp0.16b, h2.16b, h3.16b, #8; /* f:g */ \ | 148 | ext tmp0.16b, h2.16b, h3.16b, #8; /* f:g */ \ |
| 143 | ext tmp1.16b, h1.16b, h2.16b, #8; /* d:e */ \ | 149 | ext tmp1.16b, h1.16b, h2.16b, #8; /* d:e */ \ |
| 144 | sha512h h4, tmp0, tmp1.2d; /* T1 */ \ | 150 | sha512h h4##q, tmp0##q, tmp1.2d; /* T1 */ \ |
| 145 | add h5.2d, h1.2d, h4.2d; /* c:d + T1 */ \ | 151 | add h5.2d, h1.2d, h4.2d; /* c:d + T1 */ \ |
| 146 | sha512h2 h4, h1, h0.2d; /* T1 + T2 */ | 152 | sha512h2 h4##q, h1##q, h0.2d; /* T1 + T2 */ |
| 153 | |||
| 154 | #define sha512_round_initial(h0, h1, h2, h3, h4, h5, w, k) \ | ||
| 155 | sha512_round(h0, h1, h2, h3, h4, h5, w, k) | ||
| 147 | 156 | ||
| 148 | #define sha512_round_update(h0, h1, h2, h3, h4, h5, m0, m1, m2, m3, m4, k) \ | 157 | #define sha512_round_update(h0, h1, h2, h3, h4, h5, m0, m1, m2, m3, m4, k) \ |
| 149 | sha512_message_schedule_update(m0, m1, m2, m3, m4) \ | 158 | sha512_message_schedule_update(m0, m1, m2, m3, m4) \ |
| @@ -181,10 +190,10 @@ sha512_block_ce: | |||
| 181 | mov k512, k512_base | 190 | mov k512, k512_base |
| 182 | 191 | ||
| 183 | /* Copy current hash state. */ | 192 | /* Copy current hash state. */ |
| 184 | mov hs0.2d, hc0.2d | 193 | mov hs0.16b, hc0.16b |
| 185 | mov hs1.2d, hc1.2d | 194 | mov hs1.16b, hc1.16b |
| 186 | mov hs2.2d, hc2.2d | 195 | mov hs2.16b, hc2.16b |
| 187 | mov hs3.2d, hc3.2d | 196 | mov hs3.16b, hc3.16b |
| 188 | 197 | ||
| 189 | /* Load and byte swap message schedule. */ | 198 | /* Load and byte swap message schedule. */ |
| 190 | ld1 {w0.16b, w1.16b, w2.16b, w3.16b}, [in], #64 | 199 | ld1 {w0.16b, w1.16b, w2.16b, w3.16b}, [in], #64 |
| @@ -203,14 +212,14 @@ sha512_block_ce: | |||
| 203 | ld1 {k0.2d, k1.2d, k2.2d, k3.2d}, [k512], #64 | 212 | ld1 {k0.2d, k1.2d, k2.2d, k3.2d}, [k512], #64 |
| 204 | ld1 {k4.2d, k5.2d, k6.2d, k7.2d}, [k512], #64 | 213 | ld1 {k4.2d, k5.2d, k6.2d, k7.2d}, [k512], #64 |
| 205 | 214 | ||
| 206 | sha512_round(hs0, hs1, hs2, hs3, hs4, hs5, w0, k0) | 215 | sha512_round_initial(hs0, hs1, hs2, hs3, hs4, hs5, w0, k0) |
| 207 | sha512_round(hs4, hs0, hs5, hs2, hs6, hs7, w1, k1) | 216 | sha512_round_initial(hs4, hs0, hs5, hs2, hs6, hs7, w1, k1) |
| 208 | sha512_round(hs6, hs4, hs7, hs5, hs1, hs3, w2, k2) | 217 | sha512_round_initial(hs6, hs4, hs7, hs5, hs1, hs3, w2, k2) |
| 209 | sha512_round(hs1, hs6, hs3, hs7, hs0, hs2, w3, k3) | 218 | sha512_round_initial(hs1, hs6, hs3, hs7, hs0, hs2, w3, k3) |
| 210 | sha512_round(hs0, hs1, hs2, hs3, hs4, hs5, w4, k4) | 219 | sha512_round_initial(hs0, hs1, hs2, hs3, hs4, hs5, w4, k4) |
| 211 | sha512_round(hs4, hs0, hs5, hs2, hs6, hs7, w5, k5) | 220 | sha512_round_initial(hs4, hs0, hs5, hs2, hs6, hs7, w5, k5) |
| 212 | sha512_round(hs6, hs4, hs7, hs5, hs1, hs3, w6, k6) | 221 | sha512_round_initial(hs6, hs4, hs7, hs5, hs1, hs3, w6, k6) |
| 213 | sha512_round(hs1, hs6, hs3, hs7, hs0, hs2, w7, k7) | 222 | sha512_round_initial(hs1, hs6, hs3, hs7, hs0, hs2, w7, k7) |
| 214 | 223 | ||
| 215 | /* Rounds 16 through 31 (two rounds at a time). */ | 224 | /* Rounds 16 through 31 (two rounds at a time). */ |
| 216 | ld1 {k0.2d, k1.2d, k2.2d, k3.2d}, [k512], #64 | 225 | ld1 {k0.2d, k1.2d, k2.2d, k3.2d}, [k512], #64 |
