diff options
author | jsing <> | 2023-05-17 06:37:14 +0000 |
---|---|---|
committer | jsing <> | 2023-05-17 06:37:14 +0000 |
commit | 6683fd06977fdaa02290993ccf540e097725fbcb (patch) | |
tree | 1719bad5dfd26376b240d5f9e7a9ba134c117d2e /src/lib | |
parent | b36a9f1697cdcaa5ea871949d17a1b748ed8b0a2 (diff) | |
download | openbsd-6683fd06977fdaa02290993ccf540e097725fbcb.tar.gz openbsd-6683fd06977fdaa02290993ccf540e097725fbcb.tar.bz2 openbsd-6683fd06977fdaa02290993ccf540e097725fbcb.zip |
Clean up alignment handling for SHA-512.
All assembly implementations are required to perform their own alignment
handling. In the case of the C implementation, on strict alignment
platforms, unaligned data will be copied into an aligned buffer. However,
most platforms then perform byte-by-byte reads (via the PULL64 macros).
Instead, remove SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA and alignment
handling to sha512_block_data_order() - if the data is aligned then simply
perform 64 bit loads and then do endian conversion via be64toh(). If the
data is unaligned then use memcpy() and be64toh() (in the form of
crypto_load_be64toh()). Overall this reduces complexity and can improve
performance (on aarch64 we get a ~10% performance gain with aligned input
and about ~1-2% gain on armv7), while the same movq/bswapq is generated
for amd64 and movl/bswapl for i386.
ok tb@
Diffstat (limited to 'src/lib')
-rw-r--r-- | src/lib/libcrypto/crypto_internal.h | 55 | ||||
-rw-r--r-- | src/lib/libcrypto/sha/sha512.c | 121 |
2 files changed, 95 insertions, 81 deletions
diff --git a/src/lib/libcrypto/crypto_internal.h b/src/lib/libcrypto/crypto_internal.h index 24a06256db..2e6ab82692 100644 --- a/src/lib/libcrypto/crypto_internal.h +++ b/src/lib/libcrypto/crypto_internal.h | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: crypto_internal.h,v 1.3 2023/04/14 10:42:51 jsing Exp $ */ | 1 | /* $OpenBSD: crypto_internal.h,v 1.4 2023/05/17 06:37:14 jsing Exp $ */ |
2 | /* | 2 | /* |
3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
4 | * | 4 | * |
@@ -22,14 +22,34 @@ | |||
22 | #ifndef HEADER_CRYPTO_INTERNAL_H | 22 | #ifndef HEADER_CRYPTO_INTERNAL_H |
23 | #define HEADER_CRYPTO_INTERNAL_H | 23 | #define HEADER_CRYPTO_INTERNAL_H |
24 | 24 | ||
25 | #define CTASSERT(x) \ | ||
26 | extern char _ctassert[(x) ? 1 : -1] __attribute__((__unused__)) | ||
27 | |||
28 | /* | ||
29 | * crypto_load_be32toh() loads a 32 bit unsigned big endian value as a 32 bit | ||
30 | * unsigned host endian value, from the specified address in memory. The memory | ||
31 | * address may have any alignment. | ||
32 | */ | ||
33 | #ifndef HAVE_CRYPTO_LOAD_BE32TOH | ||
34 | static inline uint32_t | ||
35 | crypto_load_be32toh(const void *src) | ||
36 | { | ||
37 | uint32_t v; | ||
38 | |||
39 | memcpy(&v, src, sizeof(v)); | ||
40 | |||
41 | return be32toh(v); | ||
42 | } | ||
43 | #endif | ||
44 | |||
25 | /* | 45 | /* |
26 | * crypto_store_htobe32() stores a 32 bit unsigned host endian value | 46 | * crypto_store_htobe32() stores a 32 bit unsigned host endian value as a 32 bit |
27 | * as a 32 bit unsigned big endian value, at the specified location in | 47 | * unsigned big endian value, at the specified address in memory. The memory |
28 | * memory. The memory location may have any alignment. | 48 | * address may have any alignment. |
29 | */ | 49 | */ |
30 | #ifndef HAVE_CRYPTO_STORE_HTOBE32 | 50 | #ifndef HAVE_CRYPTO_STORE_HTOBE32 |
31 | static inline void | 51 | static inline void |
32 | crypto_store_htobe32(uint8_t *dst, uint32_t v) | 52 | crypto_store_htobe32(void *dst, uint32_t v) |
33 | { | 53 | { |
34 | v = htobe32(v); | 54 | v = htobe32(v); |
35 | memcpy(dst, &v, sizeof(v)); | 55 | memcpy(dst, &v, sizeof(v)); |
@@ -37,13 +57,30 @@ crypto_store_htobe32(uint8_t *dst, uint32_t v) | |||
37 | #endif | 57 | #endif |
38 | 58 | ||
39 | /* | 59 | /* |
40 | * crypto_store_htobe64() stores a 64 bit unsigned host endian value | 60 | * crypto_load_be64toh() loads a 64 bit unsigned big endian value as a 64 bit |
41 | * as a 64 bit unsigned big endian value, at the specified location in | 61 | * unsigned host endian value, from the specified address in memory. The memory |
42 | * memory. The memory location may have any alignment. | 62 | * address may have any alignment. |
63 | */ | ||
64 | #ifndef HAVE_CRYPTO_LOAD_BE64TOH | ||
65 | static inline uint64_t | ||
66 | crypto_load_be64toh(const void *src) | ||
67 | { | ||
68 | uint64_t v; | ||
69 | |||
70 | memcpy(&v, src, sizeof(v)); | ||
71 | |||
72 | return be64toh(v); | ||
73 | } | ||
74 | #endif | ||
75 | |||
76 | /* | ||
77 | * crypto_store_htobe64() stores a 64 bit unsigned host endian value as a 64 bit | ||
78 | * unsigned big endian value, at the specified address in memory. The memory | ||
79 | * address may have any alignment. | ||
43 | */ | 80 | */ |
44 | #ifndef HAVE_CRYPTO_STORE_HTOBE64 | 81 | #ifndef HAVE_CRYPTO_STORE_HTOBE64 |
45 | static inline void | 82 | static inline void |
46 | crypto_store_htobe64(uint8_t *dst, uint64_t v) | 83 | crypto_store_htobe64(void *dst, uint64_t v) |
47 | { | 84 | { |
48 | v = htobe64(v); | 85 | v = htobe64(v); |
49 | memcpy(dst, &v, sizeof(v)); | 86 | memcpy(dst, &v, sizeof(v)); |
diff --git a/src/lib/libcrypto/sha/sha512.c b/src/lib/libcrypto/sha/sha512.c index c0752bd2c7..c88ef057dd 100644 --- a/src/lib/libcrypto/sha/sha512.c +++ b/src/lib/libcrypto/sha/sha512.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: sha512.c,v 1.36 2023/05/16 07:04:57 jsing Exp $ */ | 1 | /* $OpenBSD: sha512.c,v 1.37 2023/05/17 06:37:14 jsing Exp $ */ |
2 | /* ==================================================================== | 2 | /* ==================================================================== |
3 | * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. | 3 | * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. |
4 | * | 4 | * |
@@ -66,9 +66,8 @@ | |||
66 | 66 | ||
67 | #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512) | 67 | #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512) |
68 | 68 | ||
69 | #if !defined(__STRICT_ALIGNMENT) || defined(SHA512_ASM) | 69 | /* Ensure that SHA_LONG64 and uint64_t are equivalent. */ |
70 | #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA | 70 | CTASSERT(sizeof(SHA_LONG64) == sizeof(uint64_t)); |
71 | #endif | ||
72 | 71 | ||
73 | #ifdef SHA512_ASM | 72 | #ifdef SHA512_ASM |
74 | void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num); | 73 | void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num); |
@@ -118,31 +117,6 @@ static const SHA_LONG64 K512[80] = { | |||
118 | U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817), | 117 | U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817), |
119 | }; | 118 | }; |
120 | 119 | ||
121 | #if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) | ||
122 | # if defined(__x86_64) || defined(__x86_64__) | ||
123 | # define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \ | ||
124 | asm ("bswapq %0" \ | ||
125 | : "=r"(ret) \ | ||
126 | : "0"(ret)); ret; }) | ||
127 | # elif (defined(__i386) || defined(__i386__)) | ||
128 | # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\ | ||
129 | unsigned int hi=p[0],lo=p[1]; \ | ||
130 | asm ("bswapl %0; bswapl %1;" \ | ||
131 | : "=r"(lo),"=r"(hi) \ | ||
132 | : "0"(lo),"1"(hi)); \ | ||
133 | ((SHA_LONG64)hi)<<32|lo; }) | ||
134 | # endif | ||
135 | #endif | ||
136 | |||
137 | #ifndef PULL64 | ||
138 | #if BYTE_ORDER == BIG_ENDIAN | ||
139 | #define PULL64(x) (x) | ||
140 | #else | ||
141 | #define B(x, j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8)) | ||
142 | #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7)) | ||
143 | #endif | ||
144 | #endif | ||
145 | |||
146 | #define ROTR(x, s) crypto_ror_u64(x, s) | 120 | #define ROTR(x, s) crypto_ror_u64(x, s) |
147 | 121 | ||
148 | #define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) | 122 | #define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) |
@@ -185,37 +159,60 @@ sha512_block_data_order(SHA512_CTX *ctx, const void *_in, size_t num) | |||
185 | g = ctx->h[6]; | 159 | g = ctx->h[6]; |
186 | h = ctx->h[7]; | 160 | h = ctx->h[7]; |
187 | 161 | ||
188 | X[0] = PULL64(in[0]); | 162 | if ((uintptr_t)in % sizeof(SHA_LONG64) == 0) { |
163 | /* Input is 64 bit aligned. */ | ||
164 | X[0] = be64toh(in[0]); | ||
165 | X[1] = be64toh(in[1]); | ||
166 | X[2] = be64toh(in[2]); | ||
167 | X[3] = be64toh(in[3]); | ||
168 | X[4] = be64toh(in[4]); | ||
169 | X[5] = be64toh(in[5]); | ||
170 | X[6] = be64toh(in[6]); | ||
171 | X[7] = be64toh(in[7]); | ||
172 | X[8] = be64toh(in[8]); | ||
173 | X[9] = be64toh(in[9]); | ||
174 | X[10] = be64toh(in[10]); | ||
175 | X[11] = be64toh(in[11]); | ||
176 | X[12] = be64toh(in[12]); | ||
177 | X[13] = be64toh(in[13]); | ||
178 | X[14] = be64toh(in[14]); | ||
179 | X[15] = be64toh(in[15]); | ||
180 | } else { | ||
181 | /* Input is not 64 bit aligned. */ | ||
182 | X[0] = crypto_load_be64toh(&in[0]); | ||
183 | X[1] = crypto_load_be64toh(&in[1]); | ||
184 | X[2] = crypto_load_be64toh(&in[2]); | ||
185 | X[3] = crypto_load_be64toh(&in[3]); | ||
186 | X[4] = crypto_load_be64toh(&in[4]); | ||
187 | X[5] = crypto_load_be64toh(&in[5]); | ||
188 | X[6] = crypto_load_be64toh(&in[6]); | ||
189 | X[7] = crypto_load_be64toh(&in[7]); | ||
190 | X[8] = crypto_load_be64toh(&in[8]); | ||
191 | X[9] = crypto_load_be64toh(&in[9]); | ||
192 | X[10] = crypto_load_be64toh(&in[10]); | ||
193 | X[11] = crypto_load_be64toh(&in[11]); | ||
194 | X[12] = crypto_load_be64toh(&in[12]); | ||
195 | X[13] = crypto_load_be64toh(&in[13]); | ||
196 | X[14] = crypto_load_be64toh(&in[14]); | ||
197 | X[15] = crypto_load_be64toh(&in[15]); | ||
198 | } | ||
199 | in += SHA_LBLOCK; | ||
200 | |||
189 | ROUND_00_15(0, a, b, c, d, e, f, g, h, X[0]); | 201 | ROUND_00_15(0, a, b, c, d, e, f, g, h, X[0]); |
190 | X[1] = PULL64(in[1]); | ||
191 | ROUND_00_15(1, h, a, b, c, d, e, f, g, X[1]); | 202 | ROUND_00_15(1, h, a, b, c, d, e, f, g, X[1]); |
192 | X[2] = PULL64(in[2]); | ||
193 | ROUND_00_15(2, g, h, a, b, c, d, e, f, X[2]); | 203 | ROUND_00_15(2, g, h, a, b, c, d, e, f, X[2]); |
194 | X[3] = PULL64(in[3]); | ||
195 | ROUND_00_15(3, f, g, h, a, b, c, d, e, X[3]); | 204 | ROUND_00_15(3, f, g, h, a, b, c, d, e, X[3]); |
196 | X[4] = PULL64(in[4]); | ||
197 | ROUND_00_15(4, e, f, g, h, a, b, c, d, X[4]); | 205 | ROUND_00_15(4, e, f, g, h, a, b, c, d, X[4]); |
198 | X[5] = PULL64(in[5]); | ||
199 | ROUND_00_15(5, d, e, f, g, h, a, b, c, X[5]); | 206 | ROUND_00_15(5, d, e, f, g, h, a, b, c, X[5]); |
200 | X[6] = PULL64(in[6]); | ||
201 | ROUND_00_15(6, c, d, e, f, g, h, a, b, X[6]); | 207 | ROUND_00_15(6, c, d, e, f, g, h, a, b, X[6]); |
202 | X[7] = PULL64(in[7]); | ||
203 | ROUND_00_15(7, b, c, d, e, f, g, h, a, X[7]); | 208 | ROUND_00_15(7, b, c, d, e, f, g, h, a, X[7]); |
204 | X[8] = PULL64(in[8]); | ||
205 | ROUND_00_15(8, a, b, c, d, e, f, g, h, X[8]); | 209 | ROUND_00_15(8, a, b, c, d, e, f, g, h, X[8]); |
206 | X[9] = PULL64(in[9]); | ||
207 | ROUND_00_15(9, h, a, b, c, d, e, f, g, X[9]); | 210 | ROUND_00_15(9, h, a, b, c, d, e, f, g, X[9]); |
208 | X[10] = PULL64(in[10]); | ||
209 | ROUND_00_15(10, g, h, a, b, c, d, e, f, X[10]); | 211 | ROUND_00_15(10, g, h, a, b, c, d, e, f, X[10]); |
210 | X[11] = PULL64(in[11]); | ||
211 | ROUND_00_15(11, f, g, h, a, b, c, d, e, X[11]); | 212 | ROUND_00_15(11, f, g, h, a, b, c, d, e, X[11]); |
212 | X[12] = PULL64(in[12]); | ||
213 | ROUND_00_15(12, e, f, g, h, a, b, c, d, X[12]); | 213 | ROUND_00_15(12, e, f, g, h, a, b, c, d, X[12]); |
214 | X[13] = PULL64(in[13]); | ||
215 | ROUND_00_15(13, d, e, f, g, h, a, b, c, X[13]); | 214 | ROUND_00_15(13, d, e, f, g, h, a, b, c, X[13]); |
216 | X[14] = PULL64(in[14]); | ||
217 | ROUND_00_15(14, c, d, e, f, g, h, a, b, X[14]); | 215 | ROUND_00_15(14, c, d, e, f, g, h, a, b, X[14]); |
218 | X[15] = PULL64(in[15]); | ||
219 | ROUND_00_15(15, b, c, d, e, f, g, h, a, X[15]); | 216 | ROUND_00_15(15, b, c, d, e, f, g, h, a, X[15]); |
220 | 217 | ||
221 | for (i = 16; i < 80; i += 16) { | 218 | for (i = 16; i < 80; i += 16) { |
@@ -245,8 +242,6 @@ sha512_block_data_order(SHA512_CTX *ctx, const void *_in, size_t num) | |||
245 | ctx->h[5] += f; | 242 | ctx->h[5] += f; |
246 | ctx->h[6] += g; | 243 | ctx->h[6] += g; |
247 | ctx->h[7] += h; | 244 | ctx->h[7] += h; |
248 | |||
249 | in += SHA_LBLOCK; | ||
250 | } | 245 | } |
251 | } | 246 | } |
252 | 247 | ||
@@ -323,21 +318,15 @@ SHA512_Init(SHA512_CTX *c) | |||
323 | void | 318 | void |
324 | SHA512_Transform(SHA512_CTX *c, const unsigned char *data) | 319 | SHA512_Transform(SHA512_CTX *c, const unsigned char *data) |
325 | { | 320 | { |
326 | #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA | ||
327 | if ((size_t)data % sizeof(c->u.d[0]) != 0) { | ||
328 | memcpy(c->u.p, data, sizeof(c->u.p)); | ||
329 | data = c->u.p; | ||
330 | } | ||
331 | #endif | ||
332 | sha512_block_data_order(c, data, 1); | 321 | sha512_block_data_order(c, data, 1); |
333 | } | 322 | } |
334 | 323 | ||
335 | int | 324 | int |
336 | SHA512_Update(SHA512_CTX *c, const void *_data, size_t len) | 325 | SHA512_Update(SHA512_CTX *c, const void *_data, size_t len) |
337 | { | 326 | { |
338 | SHA_LONG64 l; | 327 | const unsigned char *data = _data; |
339 | unsigned char *p = c->u.p; | 328 | unsigned char *p = c->u.p; |
340 | const unsigned char *data = (const unsigned char *)_data; | 329 | SHA_LONG64 l; |
341 | 330 | ||
342 | if (len == 0) | 331 | if (len == 0) |
343 | return 1; | 332 | return 1; |
@@ -366,22 +355,10 @@ SHA512_Update(SHA512_CTX *c, const void *_data, size_t len) | |||
366 | } | 355 | } |
367 | 356 | ||
368 | if (len >= sizeof(c->u)) { | 357 | if (len >= sizeof(c->u)) { |
369 | #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA | 358 | sha512_block_data_order(c, data, len/sizeof(c->u)); |
370 | if ((size_t)data % sizeof(c->u.d[0]) != 0) { | 359 | data += len; |
371 | while (len >= sizeof(c->u)) { | 360 | len %= sizeof(c->u); |
372 | memcpy(p, data, sizeof(c->u)); | 361 | data -= len; |
373 | sha512_block_data_order(c, p, 1); | ||
374 | len -= sizeof(c->u); | ||
375 | data += sizeof(c->u); | ||
376 | } | ||
377 | } else | ||
378 | #endif | ||
379 | { | ||
380 | sha512_block_data_order(c, data, len/sizeof(c->u)); | ||
381 | data += len; | ||
382 | len %= sizeof(c->u); | ||
383 | data -= len; | ||
384 | } | ||
385 | } | 362 | } |
386 | 363 | ||
387 | if (len != 0) { | 364 | if (len != 0) { |