diff options
author | jsing <> | 2023-05-17 06:37:14 +0000 |
---|---|---|
committer | jsing <> | 2023-05-17 06:37:14 +0000 |
commit | 6683fd06977fdaa02290993ccf540e097725fbcb (patch) | |
tree | 1719bad5dfd26376b240d5f9e7a9ba134c117d2e /src/lib/libcrypto/sha | |
parent | b36a9f1697cdcaa5ea871949d17a1b748ed8b0a2 (diff) | |
download | openbsd-6683fd06977fdaa02290993ccf540e097725fbcb.tar.gz openbsd-6683fd06977fdaa02290993ccf540e097725fbcb.tar.bz2 openbsd-6683fd06977fdaa02290993ccf540e097725fbcb.zip |
Clean up alignment handling for SHA-512.
All assembly implementations are required to perform their own alignment
handling. In the case of the C implementation, on strict alignment
platforms, unaligned data will be copied into an aligned buffer. However,
most platforms then perform byte-by-byte reads (via the PULL64 macros).
Instead, remove SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA and alignment
handling to sha512_block_data_order() - if the data is aligned then simply
perform 64 bit loads and then do endian conversion via be64toh(). If the
data is unaligned then use memcpy() and be64toh() (in the form of
crypto_load_be64toh()). Overall this reduces complexity and can improve
performance (on aarch64 we get a ~10% performance gain with aligned input
and about ~1-2% gain on armv7), while the same movq/bswapq is generated
for amd64 and movl/bswapl for i386.
ok tb@
Diffstat (limited to 'src/lib/libcrypto/sha')
-rw-r--r-- | src/lib/libcrypto/sha/sha512.c | 121 |
1 files changed, 49 insertions, 72 deletions
diff --git a/src/lib/libcrypto/sha/sha512.c b/src/lib/libcrypto/sha/sha512.c index c0752bd2c7..c88ef057dd 100644 --- a/src/lib/libcrypto/sha/sha512.c +++ b/src/lib/libcrypto/sha/sha512.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: sha512.c,v 1.36 2023/05/16 07:04:57 jsing Exp $ */ | 1 | /* $OpenBSD: sha512.c,v 1.37 2023/05/17 06:37:14 jsing Exp $ */ |
2 | /* ==================================================================== | 2 | /* ==================================================================== |
3 | * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. | 3 | * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. |
4 | * | 4 | * |
@@ -66,9 +66,8 @@ | |||
66 | 66 | ||
67 | #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512) | 67 | #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512) |
68 | 68 | ||
69 | #if !defined(__STRICT_ALIGNMENT) || defined(SHA512_ASM) | 69 | /* Ensure that SHA_LONG64 and uint64_t are equivalent. */ |
70 | #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA | 70 | CTASSERT(sizeof(SHA_LONG64) == sizeof(uint64_t)); |
71 | #endif | ||
72 | 71 | ||
73 | #ifdef SHA512_ASM | 72 | #ifdef SHA512_ASM |
74 | void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num); | 73 | void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num); |
@@ -118,31 +117,6 @@ static const SHA_LONG64 K512[80] = { | |||
118 | U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817), | 117 | U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817), |
119 | }; | 118 | }; |
120 | 119 | ||
121 | #if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) | ||
122 | # if defined(__x86_64) || defined(__x86_64__) | ||
123 | # define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \ | ||
124 | asm ("bswapq %0" \ | ||
125 | : "=r"(ret) \ | ||
126 | : "0"(ret)); ret; }) | ||
127 | # elif (defined(__i386) || defined(__i386__)) | ||
128 | # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\ | ||
129 | unsigned int hi=p[0],lo=p[1]; \ | ||
130 | asm ("bswapl %0; bswapl %1;" \ | ||
131 | : "=r"(lo),"=r"(hi) \ | ||
132 | : "0"(lo),"1"(hi)); \ | ||
133 | ((SHA_LONG64)hi)<<32|lo; }) | ||
134 | # endif | ||
135 | #endif | ||
136 | |||
137 | #ifndef PULL64 | ||
138 | #if BYTE_ORDER == BIG_ENDIAN | ||
139 | #define PULL64(x) (x) | ||
140 | #else | ||
141 | #define B(x, j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8)) | ||
142 | #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7)) | ||
143 | #endif | ||
144 | #endif | ||
145 | |||
146 | #define ROTR(x, s) crypto_ror_u64(x, s) | 120 | #define ROTR(x, s) crypto_ror_u64(x, s) |
147 | 121 | ||
148 | #define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) | 122 | #define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) |
@@ -185,37 +159,60 @@ sha512_block_data_order(SHA512_CTX *ctx, const void *_in, size_t num) | |||
185 | g = ctx->h[6]; | 159 | g = ctx->h[6]; |
186 | h = ctx->h[7]; | 160 | h = ctx->h[7]; |
187 | 161 | ||
188 | X[0] = PULL64(in[0]); | 162 | if ((uintptr_t)in % sizeof(SHA_LONG64) == 0) { |
163 | /* Input is 64 bit aligned. */ | ||
164 | X[0] = be64toh(in[0]); | ||
165 | X[1] = be64toh(in[1]); | ||
166 | X[2] = be64toh(in[2]); | ||
167 | X[3] = be64toh(in[3]); | ||
168 | X[4] = be64toh(in[4]); | ||
169 | X[5] = be64toh(in[5]); | ||
170 | X[6] = be64toh(in[6]); | ||
171 | X[7] = be64toh(in[7]); | ||
172 | X[8] = be64toh(in[8]); | ||
173 | X[9] = be64toh(in[9]); | ||
174 | X[10] = be64toh(in[10]); | ||
175 | X[11] = be64toh(in[11]); | ||
176 | X[12] = be64toh(in[12]); | ||
177 | X[13] = be64toh(in[13]); | ||
178 | X[14] = be64toh(in[14]); | ||
179 | X[15] = be64toh(in[15]); | ||
180 | } else { | ||
181 | /* Input is not 64 bit aligned. */ | ||
182 | X[0] = crypto_load_be64toh(&in[0]); | ||
183 | X[1] = crypto_load_be64toh(&in[1]); | ||
184 | X[2] = crypto_load_be64toh(&in[2]); | ||
185 | X[3] = crypto_load_be64toh(&in[3]); | ||
186 | X[4] = crypto_load_be64toh(&in[4]); | ||
187 | X[5] = crypto_load_be64toh(&in[5]); | ||
188 | X[6] = crypto_load_be64toh(&in[6]); | ||
189 | X[7] = crypto_load_be64toh(&in[7]); | ||
190 | X[8] = crypto_load_be64toh(&in[8]); | ||
191 | X[9] = crypto_load_be64toh(&in[9]); | ||
192 | X[10] = crypto_load_be64toh(&in[10]); | ||
193 | X[11] = crypto_load_be64toh(&in[11]); | ||
194 | X[12] = crypto_load_be64toh(&in[12]); | ||
195 | X[13] = crypto_load_be64toh(&in[13]); | ||
196 | X[14] = crypto_load_be64toh(&in[14]); | ||
197 | X[15] = crypto_load_be64toh(&in[15]); | ||
198 | } | ||
199 | in += SHA_LBLOCK; | ||
200 | |||
189 | ROUND_00_15(0, a, b, c, d, e, f, g, h, X[0]); | 201 | ROUND_00_15(0, a, b, c, d, e, f, g, h, X[0]); |
190 | X[1] = PULL64(in[1]); | ||
191 | ROUND_00_15(1, h, a, b, c, d, e, f, g, X[1]); | 202 | ROUND_00_15(1, h, a, b, c, d, e, f, g, X[1]); |
192 | X[2] = PULL64(in[2]); | ||
193 | ROUND_00_15(2, g, h, a, b, c, d, e, f, X[2]); | 203 | ROUND_00_15(2, g, h, a, b, c, d, e, f, X[2]); |
194 | X[3] = PULL64(in[3]); | ||
195 | ROUND_00_15(3, f, g, h, a, b, c, d, e, X[3]); | 204 | ROUND_00_15(3, f, g, h, a, b, c, d, e, X[3]); |
196 | X[4] = PULL64(in[4]); | ||
197 | ROUND_00_15(4, e, f, g, h, a, b, c, d, X[4]); | 205 | ROUND_00_15(4, e, f, g, h, a, b, c, d, X[4]); |
198 | X[5] = PULL64(in[5]); | ||
199 | ROUND_00_15(5, d, e, f, g, h, a, b, c, X[5]); | 206 | ROUND_00_15(5, d, e, f, g, h, a, b, c, X[5]); |
200 | X[6] = PULL64(in[6]); | ||
201 | ROUND_00_15(6, c, d, e, f, g, h, a, b, X[6]); | 207 | ROUND_00_15(6, c, d, e, f, g, h, a, b, X[6]); |
202 | X[7] = PULL64(in[7]); | ||
203 | ROUND_00_15(7, b, c, d, e, f, g, h, a, X[7]); | 208 | ROUND_00_15(7, b, c, d, e, f, g, h, a, X[7]); |
204 | X[8] = PULL64(in[8]); | ||
205 | ROUND_00_15(8, a, b, c, d, e, f, g, h, X[8]); | 209 | ROUND_00_15(8, a, b, c, d, e, f, g, h, X[8]); |
206 | X[9] = PULL64(in[9]); | ||
207 | ROUND_00_15(9, h, a, b, c, d, e, f, g, X[9]); | 210 | ROUND_00_15(9, h, a, b, c, d, e, f, g, X[9]); |
208 | X[10] = PULL64(in[10]); | ||
209 | ROUND_00_15(10, g, h, a, b, c, d, e, f, X[10]); | 211 | ROUND_00_15(10, g, h, a, b, c, d, e, f, X[10]); |
210 | X[11] = PULL64(in[11]); | ||
211 | ROUND_00_15(11, f, g, h, a, b, c, d, e, X[11]); | 212 | ROUND_00_15(11, f, g, h, a, b, c, d, e, X[11]); |
212 | X[12] = PULL64(in[12]); | ||
213 | ROUND_00_15(12, e, f, g, h, a, b, c, d, X[12]); | 213 | ROUND_00_15(12, e, f, g, h, a, b, c, d, X[12]); |
214 | X[13] = PULL64(in[13]); | ||
215 | ROUND_00_15(13, d, e, f, g, h, a, b, c, X[13]); | 214 | ROUND_00_15(13, d, e, f, g, h, a, b, c, X[13]); |
216 | X[14] = PULL64(in[14]); | ||
217 | ROUND_00_15(14, c, d, e, f, g, h, a, b, X[14]); | 215 | ROUND_00_15(14, c, d, e, f, g, h, a, b, X[14]); |
218 | X[15] = PULL64(in[15]); | ||
219 | ROUND_00_15(15, b, c, d, e, f, g, h, a, X[15]); | 216 | ROUND_00_15(15, b, c, d, e, f, g, h, a, X[15]); |
220 | 217 | ||
221 | for (i = 16; i < 80; i += 16) { | 218 | for (i = 16; i < 80; i += 16) { |
@@ -245,8 +242,6 @@ sha512_block_data_order(SHA512_CTX *ctx, const void *_in, size_t num) | |||
245 | ctx->h[5] += f; | 242 | ctx->h[5] += f; |
246 | ctx->h[6] += g; | 243 | ctx->h[6] += g; |
247 | ctx->h[7] += h; | 244 | ctx->h[7] += h; |
248 | |||
249 | in += SHA_LBLOCK; | ||
250 | } | 245 | } |
251 | } | 246 | } |
252 | 247 | ||
@@ -323,21 +318,15 @@ SHA512_Init(SHA512_CTX *c) | |||
323 | void | 318 | void |
324 | SHA512_Transform(SHA512_CTX *c, const unsigned char *data) | 319 | SHA512_Transform(SHA512_CTX *c, const unsigned char *data) |
325 | { | 320 | { |
326 | #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA | ||
327 | if ((size_t)data % sizeof(c->u.d[0]) != 0) { | ||
328 | memcpy(c->u.p, data, sizeof(c->u.p)); | ||
329 | data = c->u.p; | ||
330 | } | ||
331 | #endif | ||
332 | sha512_block_data_order(c, data, 1); | 321 | sha512_block_data_order(c, data, 1); |
333 | } | 322 | } |
334 | 323 | ||
335 | int | 324 | int |
336 | SHA512_Update(SHA512_CTX *c, const void *_data, size_t len) | 325 | SHA512_Update(SHA512_CTX *c, const void *_data, size_t len) |
337 | { | 326 | { |
338 | SHA_LONG64 l; | 327 | const unsigned char *data = _data; |
339 | unsigned char *p = c->u.p; | 328 | unsigned char *p = c->u.p; |
340 | const unsigned char *data = (const unsigned char *)_data; | 329 | SHA_LONG64 l; |
341 | 330 | ||
342 | if (len == 0) | 331 | if (len == 0) |
343 | return 1; | 332 | return 1; |
@@ -366,22 +355,10 @@ SHA512_Update(SHA512_CTX *c, const void *_data, size_t len) | |||
366 | } | 355 | } |
367 | 356 | ||
368 | if (len >= sizeof(c->u)) { | 357 | if (len >= sizeof(c->u)) { |
369 | #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA | 358 | sha512_block_data_order(c, data, len/sizeof(c->u)); |
370 | if ((size_t)data % sizeof(c->u.d[0]) != 0) { | 359 | data += len; |
371 | while (len >= sizeof(c->u)) { | 360 | len %= sizeof(c->u); |
372 | memcpy(p, data, sizeof(c->u)); | 361 | data -= len; |
373 | sha512_block_data_order(c, p, 1); | ||
374 | len -= sizeof(c->u); | ||
375 | data += sizeof(c->u); | ||
376 | } | ||
377 | } else | ||
378 | #endif | ||
379 | { | ||
380 | sha512_block_data_order(c, data, len/sizeof(c->u)); | ||
381 | data += len; | ||
382 | len %= sizeof(c->u); | ||
383 | data -= len; | ||
384 | } | ||
385 | } | 362 | } |
386 | 363 | ||
387 | if (len != 0) { | 364 | if (len != 0) { |