summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjsing <>2023-08-11 15:25:36 +0000
committerjsing <>2023-08-11 15:25:36 +0000
commit7a3dd68a4f6fdb4ee9becd43c30327999e64405c (patch)
treeeb5d6cc5a362f5b5bae5441e8d0134cab87ecb2c
parent7bf985b2585d3c60a607e8ff8e3c261213a8dde3 (diff)
downloadopenbsd-7a3dd68a4f6fdb4ee9becd43c30327999e64405c.tar.gz
openbsd-7a3dd68a4f6fdb4ee9becd43c30327999e64405c.tar.bz2
openbsd-7a3dd68a4f6fdb4ee9becd43c30327999e64405c.zip
Demacro sha256.
Replace macros with static inline functions, as well as writing out the variable rotations instead of trying to outsmart the compiler. Also pull the message schedule update up and complete it prior to commencement of the round. Also use rotate right, rather than transposed rotate left. Overall this is more readable and more closely follows the specification. On some platforms (e.g. aarch64) there is no noteable change in performance, while on others there is a significant improvement (more than 25% on arm). ok miod@ tb@
-rw-r--r--src/lib/libcrypto/sha/sha256.c163
1 files changed, 114 insertions, 49 deletions
diff --git a/src/lib/libcrypto/sha/sha256.c b/src/lib/libcrypto/sha/sha256.c
index 231a5a058c..c5f56f13dc 100644
--- a/src/lib/libcrypto/sha/sha256.c
+++ b/src/lib/libcrypto/sha/sha256.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: sha256.c,v 1.28 2023/08/10 07:15:23 jsing Exp $ */ 1/* $OpenBSD: sha256.c,v 1.29 2023/08/11 15:25:36 jsing Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -106,36 +106,77 @@ static const SHA_LONG K256[64] = {
106 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL, 106 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL,
107}; 107};
108 108
109/* 109static inline SHA_LONG
110 * FIPS specification refers to right rotations, while our ROTATE macro 110Sigma0(SHA_LONG x)
111 * is left one. This is why you might notice that rotation coefficients 111{
112 * differ from those observed in FIPS document by 32-N... 112 return crypto_ror_u32(x, 2) ^ crypto_ror_u32(x, 13) ^
113 */ 113 crypto_ror_u32(x, 22);
114#define Sigma0(x) (ROTATE((x),30) ^ ROTATE((x),19) ^ ROTATE((x),10)) 114}
115#define Sigma1(x) (ROTATE((x),26) ^ ROTATE((x),21) ^ ROTATE((x),7))
116#define sigma0(x) (ROTATE((x),25) ^ ROTATE((x),14) ^ ((x)>>3))
117#define sigma1(x) (ROTATE((x),15) ^ ROTATE((x),13) ^ ((x)>>10))
118 115
119#define Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z))) 116static inline SHA_LONG
120#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) 117Sigma1(SHA_LONG x)
118{
119 return crypto_ror_u32(x, 6) ^ crypto_ror_u32(x, 11) ^
120 crypto_ror_u32(x, 25);
121}
121 122
122#define ROUND_00_15(x, i, a, b, c, d, e, f, g, h) do { \ 123static inline SHA_LONG
123 T1 = x + h + Sigma1(e) + Ch(e, f, g) + K256[i]; \ 124sigma0(SHA_LONG x)
124 h = Sigma0(a) + Maj(a, b, c); \ 125{
125 d += T1; h += T1; } while (0) 126 return crypto_ror_u32(x, 7) ^ crypto_ror_u32(x, 18) ^ (x >> 3);
127}
126 128
127#define ROUND_16_63(i, a, b, c, d, e, f, g, h, X) do { \ 129static inline SHA_LONG
128 s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); \ 130sigma1(SHA_LONG x)
129 s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); \ 131{
130 T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f]; \ 132 return crypto_ror_u32(x, 17) ^ crypto_ror_u32(x, 19) ^ (x >> 10);
131 ROUND_00_15(T1, i, a, b, c, d, e, f, g, h); } while (0) 133}
134
135static inline SHA_LONG
136Ch(SHA_LONG x, SHA_LONG y, SHA_LONG z)
137{
138 return (x & y) ^ (~x & z);
139}
140
141static inline SHA_LONG
142Maj(SHA_LONG x, SHA_LONG y, SHA_LONG z)
143{
144 return (x & y) ^ (x & z) ^ (y & z);
145}
146
147static inline void
148sha256_msg_schedule_update(SHA_LONG *W0, SHA_LONG W1,
149 SHA_LONG W9, SHA_LONG W14)
150{
151 *W0 = sigma1(W14) + W9 + sigma0(W1) + *W0;
152}
153
154static inline void
155sha256_round(SHA_LONG *a, SHA_LONG *b, SHA_LONG *c, SHA_LONG *d,
156 SHA_LONG *e, SHA_LONG *f, SHA_LONG *g, SHA_LONG *h,
157 SHA_LONG Kt, SHA_LONG Wt)
158{
159 SHA_LONG T1, T2;
160
161 T1 = *h + Sigma1(*e) + Ch(*e, *f, *g) + Kt + Wt;
162 T2 = Sigma0(*a) + Maj(*a, *b, *c);
163
164 *h = *g;
165 *g = *f;
166 *f = *e;
167 *e = *d + T1;
168 *d = *c;
169 *c = *b;
170 *b = *a;
171 *a = T1 + T2;
172}
132 173
133static void 174static void
134sha256_block_data_order(SHA256_CTX *ctx, const void *_in, size_t num) 175sha256_block_data_order(SHA256_CTX *ctx, const void *_in, size_t num)
135{ 176{
136 const uint8_t *in = _in; 177 const uint8_t *in = _in;
137 const SHA_LONG *in32; 178 const SHA_LONG *in32;
138 unsigned int a, b, c, d, e, f, g, h, s0, s1, T1; 179 SHA_LONG a, b, c, d, e, f, g, h;
139 SHA_LONG X[16]; 180 SHA_LONG X[16];
140 int i; 181 int i;
141 182
@@ -189,33 +230,57 @@ sha256_block_data_order(SHA256_CTX *ctx, const void *_in, size_t num)
189 } 230 }
190 in += SHA256_CBLOCK; 231 in += SHA256_CBLOCK;
191 232
192 ROUND_00_15(X[0], 0, a, b, c, d, e, f, g, h); 233 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[0], X[0]);
193 ROUND_00_15(X[1], 1, h, a, b, c, d, e, f, g); 234 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[1], X[1]);
194 ROUND_00_15(X[2], 2, g, h, a, b, c, d, e, f); 235 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[2], X[2]);
195 ROUND_00_15(X[3], 3, f, g, h, a, b, c, d, e); 236 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[3], X[3]);
196 ROUND_00_15(X[4], 4, e, f, g, h, a, b, c, d); 237 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[4], X[4]);
197 ROUND_00_15(X[5], 5, d, e, f, g, h, a, b, c); 238 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[5], X[5]);
198 ROUND_00_15(X[6], 6, c, d, e, f, g, h, a, b); 239 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[6], X[6]);
199 ROUND_00_15(X[7], 7, b, c, d, e, f, g, h, a); 240 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[7], X[7]);
200 241 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[8], X[8]);
201 ROUND_00_15(X[8], 8, a, b, c, d, e, f, g, h); 242 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[9], X[9]);
202 ROUND_00_15(X[9], 9, h, a, b, c, d, e, f, g); 243 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[10], X[10]);
203 ROUND_00_15(X[10], 10, g, h, a, b, c, d, e, f); 244 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[11], X[11]);
204 ROUND_00_15(X[11], 11, f, g, h, a, b, c, d, e); 245 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[12], X[12]);
205 ROUND_00_15(X[12], 12, e, f, g, h, a, b, c, d); 246 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[13], X[13]);
206 ROUND_00_15(X[13], 13, d, e, f, g, h, a, b, c); 247 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[14], X[14]);
207 ROUND_00_15(X[14], 14, c, d, e, f, g, h, a, b); 248 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[15], X[15]);
208 ROUND_00_15(X[15], 15, b, c, d, e, f, g, h, a); 249
209 250 for (i = 16; i < 64; i += 16) {
210 for (i = 16; i < 64; i += 8) { 251 sha256_msg_schedule_update(&X[0], X[1], X[9], X[14]);
211 ROUND_16_63(i + 0, a, b, c, d, e, f, g, h, X); 252 sha256_msg_schedule_update(&X[1], X[2], X[10], X[15]);
212 ROUND_16_63(i + 1, h, a, b, c, d, e, f, g, X); 253 sha256_msg_schedule_update(&X[2], X[3], X[11], X[0]);
213 ROUND_16_63(i + 2, g, h, a, b, c, d, e, f, X); 254 sha256_msg_schedule_update(&X[3], X[4], X[12], X[1]);
214 ROUND_16_63(i + 3, f, g, h, a, b, c, d, e, X); 255 sha256_msg_schedule_update(&X[4], X[5], X[13], X[2]);
215 ROUND_16_63(i + 4, e, f, g, h, a, b, c, d, X); 256 sha256_msg_schedule_update(&X[5], X[6], X[14], X[3]);
216 ROUND_16_63(i + 5, d, e, f, g, h, a, b, c, X); 257 sha256_msg_schedule_update(&X[6], X[7], X[15], X[4]);
217 ROUND_16_63(i + 6, c, d, e, f, g, h, a, b, X); 258 sha256_msg_schedule_update(&X[7], X[8], X[0], X[5]);
218 ROUND_16_63(i + 7, b, c, d, e, f, g, h, a, X); 259 sha256_msg_schedule_update(&X[8], X[9], X[1], X[6]);
260 sha256_msg_schedule_update(&X[9], X[10], X[2], X[7]);
261 sha256_msg_schedule_update(&X[10], X[11], X[3], X[8]);
262 sha256_msg_schedule_update(&X[11], X[12], X[4], X[9]);
263 sha256_msg_schedule_update(&X[12], X[13], X[5], X[10]);
264 sha256_msg_schedule_update(&X[13], X[14], X[6], X[11]);
265 sha256_msg_schedule_update(&X[14], X[15], X[7], X[12]);
266 sha256_msg_schedule_update(&X[15], X[0], X[8], X[13]);
267
268 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[i + 0], X[0]);
269 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[i + 1], X[1]);
270 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[i + 2], X[2]);
271 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[i + 3], X[3]);
272 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[i + 4], X[4]);
273 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[i + 5], X[5]);
274 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[i + 6], X[6]);
275 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[i + 7], X[7]);
276 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[i + 8], X[8]);
277 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[i + 9], X[9]);
278 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[i + 10], X[10]);
279 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[i + 11], X[11]);
280 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[i + 12], X[12]);
281 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[i + 13], X[13]);
282 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[i + 14], X[14]);
283 sha256_round(&a, &b, &c, &d, &e, &f, &g, &h, K256[i + 15], X[15]);
219 } 284 }
220 285
221 ctx->h[0] += a; 286 ctx->h[0] += a;