summaryrefslogtreecommitdiff
path: root/src/lib
diff options
context:
space:
mode:
authorjsing <>2023-05-17 06:37:14 +0000
committerjsing <>2023-05-17 06:37:14 +0000
commit6683fd06977fdaa02290993ccf540e097725fbcb (patch)
tree1719bad5dfd26376b240d5f9e7a9ba134c117d2e /src/lib
parentb36a9f1697cdcaa5ea871949d17a1b748ed8b0a2 (diff)
downloadopenbsd-6683fd06977fdaa02290993ccf540e097725fbcb.tar.gz
openbsd-6683fd06977fdaa02290993ccf540e097725fbcb.tar.bz2
openbsd-6683fd06977fdaa02290993ccf540e097725fbcb.zip
Clean up alignment handling for SHA-512.
All assembly implementations are required to perform their own alignment handling. In the case of the C implementation, on strict alignment platforms, unaligned data will be copied into an aligned buffer. However, most platforms then perform byte-by-byte reads (via the PULL64 macros). Instead, remove SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA and alignment handling to sha512_block_data_order() - if the data is aligned then simply perform 64 bit loads and then do endian conversion via be64toh(). If the data is unaligned then use memcpy() and be64toh() (in the form of crypto_load_be64toh()). Overall this reduces complexity and can improve performance (on aarch64 we get a ~10% performance gain with aligned input and about ~1-2% gain on armv7), while the same movq/bswapq is generated for amd64 and movl/bswapl for i386. ok tb@
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/libcrypto/crypto_internal.h55
-rw-r--r--src/lib/libcrypto/sha/sha512.c121
2 files changed, 95 insertions, 81 deletions
diff --git a/src/lib/libcrypto/crypto_internal.h b/src/lib/libcrypto/crypto_internal.h
index 24a06256db..2e6ab82692 100644
--- a/src/lib/libcrypto/crypto_internal.h
+++ b/src/lib/libcrypto/crypto_internal.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: crypto_internal.h,v 1.3 2023/04/14 10:42:51 jsing Exp $ */ 1/* $OpenBSD: crypto_internal.h,v 1.4 2023/05/17 06:37:14 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -22,14 +22,34 @@
22#ifndef HEADER_CRYPTO_INTERNAL_H 22#ifndef HEADER_CRYPTO_INTERNAL_H
23#define HEADER_CRYPTO_INTERNAL_H 23#define HEADER_CRYPTO_INTERNAL_H
24 24
25#define CTASSERT(x) \
26 extern char _ctassert[(x) ? 1 : -1] __attribute__((__unused__))
27
28/*
29 * crypto_load_be32toh() loads a 32 bit unsigned big endian value as a 32 bit
30 * unsigned host endian value, from the specified address in memory. The memory
31 * address may have any alignment.
32 */
33#ifndef HAVE_CRYPTO_LOAD_BE32TOH
34static inline uint32_t
35crypto_load_be32toh(const void *src)
36{
37 uint32_t v;
38
39 memcpy(&v, src, sizeof(v));
40
41 return be32toh(v);
42}
43#endif
44
25/* 45/*
26 * crypto_store_htobe32() stores a 32 bit unsigned host endian value 46 * crypto_store_htobe32() stores a 32 bit unsigned host endian value as a 32 bit
27 * as a 32 bit unsigned big endian value, at the specified location in 47 * unsigned big endian value, at the specified address in memory. The memory
28 * memory. The memory location may have any alignment. 48 * address may have any alignment.
29 */ 49 */
30#ifndef HAVE_CRYPTO_STORE_HTOBE32 50#ifndef HAVE_CRYPTO_STORE_HTOBE32
31static inline void 51static inline void
32crypto_store_htobe32(uint8_t *dst, uint32_t v) 52crypto_store_htobe32(void *dst, uint32_t v)
33{ 53{
34 v = htobe32(v); 54 v = htobe32(v);
35 memcpy(dst, &v, sizeof(v)); 55 memcpy(dst, &v, sizeof(v));
@@ -37,13 +57,30 @@ crypto_store_htobe32(uint8_t *dst, uint32_t v)
37#endif 57#endif
38 58
39/* 59/*
40 * crypto_store_htobe64() stores a 64 bit unsigned host endian value 60 * crypto_load_be64toh() loads a 64 bit unsigned big endian value as a 64 bit
41 * as a 64 bit unsigned big endian value, at the specified location in 61 * unsigned host endian value, from the specified address in memory. The memory
42 * memory. The memory location may have any alignment. 62 * address may have any alignment.
63 */
64#ifndef HAVE_CRYPTO_LOAD_BE64TOH
65static inline uint64_t
66crypto_load_be64toh(const void *src)
67{
68 uint64_t v;
69
70 memcpy(&v, src, sizeof(v));
71
72 return be64toh(v);
73}
74#endif
75
76/*
77 * crypto_store_htobe64() stores a 64 bit unsigned host endian value as a 64 bit
78 * unsigned big endian value, at the specified address in memory. The memory
79 * address may have any alignment.
43 */ 80 */
44#ifndef HAVE_CRYPTO_STORE_HTOBE64 81#ifndef HAVE_CRYPTO_STORE_HTOBE64
45static inline void 82static inline void
46crypto_store_htobe64(uint8_t *dst, uint64_t v) 83crypto_store_htobe64(void *dst, uint64_t v)
47{ 84{
48 v = htobe64(v); 85 v = htobe64(v);
49 memcpy(dst, &v, sizeof(v)); 86 memcpy(dst, &v, sizeof(v));
diff --git a/src/lib/libcrypto/sha/sha512.c b/src/lib/libcrypto/sha/sha512.c
index c0752bd2c7..c88ef057dd 100644
--- a/src/lib/libcrypto/sha/sha512.c
+++ b/src/lib/libcrypto/sha/sha512.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: sha512.c,v 1.36 2023/05/16 07:04:57 jsing Exp $ */ 1/* $OpenBSD: sha512.c,v 1.37 2023/05/17 06:37:14 jsing Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -66,9 +66,8 @@
66 66
67#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512) 67#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
68 68
69#if !defined(__STRICT_ALIGNMENT) || defined(SHA512_ASM) 69/* Ensure that SHA_LONG64 and uint64_t are equivalent. */
70#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA 70CTASSERT(sizeof(SHA_LONG64) == sizeof(uint64_t));
71#endif
72 71
73#ifdef SHA512_ASM 72#ifdef SHA512_ASM
74void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num); 73void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
@@ -118,31 +117,6 @@ static const SHA_LONG64 K512[80] = {
118 U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817), 117 U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817),
119}; 118};
120 119
121#if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
122# if defined(__x86_64) || defined(__x86_64__)
123# define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
124 asm ("bswapq %0" \
125 : "=r"(ret) \
126 : "0"(ret)); ret; })
127# elif (defined(__i386) || defined(__i386__))
128# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
129 unsigned int hi=p[0],lo=p[1]; \
130 asm ("bswapl %0; bswapl %1;" \
131 : "=r"(lo),"=r"(hi) \
132 : "0"(lo),"1"(hi)); \
133 ((SHA_LONG64)hi)<<32|lo; })
134# endif
135#endif
136
137#ifndef PULL64
138#if BYTE_ORDER == BIG_ENDIAN
139#define PULL64(x) (x)
140#else
141#define B(x, j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
142#define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
143#endif
144#endif
145
146#define ROTR(x, s) crypto_ror_u64(x, s) 120#define ROTR(x, s) crypto_ror_u64(x, s)
147 121
148#define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) 122#define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
@@ -185,37 +159,60 @@ sha512_block_data_order(SHA512_CTX *ctx, const void *_in, size_t num)
185 g = ctx->h[6]; 159 g = ctx->h[6];
186 h = ctx->h[7]; 160 h = ctx->h[7];
187 161
188 X[0] = PULL64(in[0]); 162 if ((uintptr_t)in % sizeof(SHA_LONG64) == 0) {
163 /* Input is 64 bit aligned. */
164 X[0] = be64toh(in[0]);
165 X[1] = be64toh(in[1]);
166 X[2] = be64toh(in[2]);
167 X[3] = be64toh(in[3]);
168 X[4] = be64toh(in[4]);
169 X[5] = be64toh(in[5]);
170 X[6] = be64toh(in[6]);
171 X[7] = be64toh(in[7]);
172 X[8] = be64toh(in[8]);
173 X[9] = be64toh(in[9]);
174 X[10] = be64toh(in[10]);
175 X[11] = be64toh(in[11]);
176 X[12] = be64toh(in[12]);
177 X[13] = be64toh(in[13]);
178 X[14] = be64toh(in[14]);
179 X[15] = be64toh(in[15]);
180 } else {
181 /* Input is not 64 bit aligned. */
182 X[0] = crypto_load_be64toh(&in[0]);
183 X[1] = crypto_load_be64toh(&in[1]);
184 X[2] = crypto_load_be64toh(&in[2]);
185 X[3] = crypto_load_be64toh(&in[3]);
186 X[4] = crypto_load_be64toh(&in[4]);
187 X[5] = crypto_load_be64toh(&in[5]);
188 X[6] = crypto_load_be64toh(&in[6]);
189 X[7] = crypto_load_be64toh(&in[7]);
190 X[8] = crypto_load_be64toh(&in[8]);
191 X[9] = crypto_load_be64toh(&in[9]);
192 X[10] = crypto_load_be64toh(&in[10]);
193 X[11] = crypto_load_be64toh(&in[11]);
194 X[12] = crypto_load_be64toh(&in[12]);
195 X[13] = crypto_load_be64toh(&in[13]);
196 X[14] = crypto_load_be64toh(&in[14]);
197 X[15] = crypto_load_be64toh(&in[15]);
198 }
199 in += SHA_LBLOCK;
200
189 ROUND_00_15(0, a, b, c, d, e, f, g, h, X[0]); 201 ROUND_00_15(0, a, b, c, d, e, f, g, h, X[0]);
190 X[1] = PULL64(in[1]);
191 ROUND_00_15(1, h, a, b, c, d, e, f, g, X[1]); 202 ROUND_00_15(1, h, a, b, c, d, e, f, g, X[1]);
192 X[2] = PULL64(in[2]);
193 ROUND_00_15(2, g, h, a, b, c, d, e, f, X[2]); 203 ROUND_00_15(2, g, h, a, b, c, d, e, f, X[2]);
194 X[3] = PULL64(in[3]);
195 ROUND_00_15(3, f, g, h, a, b, c, d, e, X[3]); 204 ROUND_00_15(3, f, g, h, a, b, c, d, e, X[3]);
196 X[4] = PULL64(in[4]);
197 ROUND_00_15(4, e, f, g, h, a, b, c, d, X[4]); 205 ROUND_00_15(4, e, f, g, h, a, b, c, d, X[4]);
198 X[5] = PULL64(in[5]);
199 ROUND_00_15(5, d, e, f, g, h, a, b, c, X[5]); 206 ROUND_00_15(5, d, e, f, g, h, a, b, c, X[5]);
200 X[6] = PULL64(in[6]);
201 ROUND_00_15(6, c, d, e, f, g, h, a, b, X[6]); 207 ROUND_00_15(6, c, d, e, f, g, h, a, b, X[6]);
202 X[7] = PULL64(in[7]);
203 ROUND_00_15(7, b, c, d, e, f, g, h, a, X[7]); 208 ROUND_00_15(7, b, c, d, e, f, g, h, a, X[7]);
204 X[8] = PULL64(in[8]);
205 ROUND_00_15(8, a, b, c, d, e, f, g, h, X[8]); 209 ROUND_00_15(8, a, b, c, d, e, f, g, h, X[8]);
206 X[9] = PULL64(in[9]);
207 ROUND_00_15(9, h, a, b, c, d, e, f, g, X[9]); 210 ROUND_00_15(9, h, a, b, c, d, e, f, g, X[9]);
208 X[10] = PULL64(in[10]);
209 ROUND_00_15(10, g, h, a, b, c, d, e, f, X[10]); 211 ROUND_00_15(10, g, h, a, b, c, d, e, f, X[10]);
210 X[11] = PULL64(in[11]);
211 ROUND_00_15(11, f, g, h, a, b, c, d, e, X[11]); 212 ROUND_00_15(11, f, g, h, a, b, c, d, e, X[11]);
212 X[12] = PULL64(in[12]);
213 ROUND_00_15(12, e, f, g, h, a, b, c, d, X[12]); 213 ROUND_00_15(12, e, f, g, h, a, b, c, d, X[12]);
214 X[13] = PULL64(in[13]);
215 ROUND_00_15(13, d, e, f, g, h, a, b, c, X[13]); 214 ROUND_00_15(13, d, e, f, g, h, a, b, c, X[13]);
216 X[14] = PULL64(in[14]);
217 ROUND_00_15(14, c, d, e, f, g, h, a, b, X[14]); 215 ROUND_00_15(14, c, d, e, f, g, h, a, b, X[14]);
218 X[15] = PULL64(in[15]);
219 ROUND_00_15(15, b, c, d, e, f, g, h, a, X[15]); 216 ROUND_00_15(15, b, c, d, e, f, g, h, a, X[15]);
220 217
221 for (i = 16; i < 80; i += 16) { 218 for (i = 16; i < 80; i += 16) {
@@ -245,8 +242,6 @@ sha512_block_data_order(SHA512_CTX *ctx, const void *_in, size_t num)
245 ctx->h[5] += f; 242 ctx->h[5] += f;
246 ctx->h[6] += g; 243 ctx->h[6] += g;
247 ctx->h[7] += h; 244 ctx->h[7] += h;
248
249 in += SHA_LBLOCK;
250 } 245 }
251} 246}
252 247
@@ -323,21 +318,15 @@ SHA512_Init(SHA512_CTX *c)
323void 318void
324SHA512_Transform(SHA512_CTX *c, const unsigned char *data) 319SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
325{ 320{
326#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
327 if ((size_t)data % sizeof(c->u.d[0]) != 0) {
328 memcpy(c->u.p, data, sizeof(c->u.p));
329 data = c->u.p;
330 }
331#endif
332 sha512_block_data_order(c, data, 1); 321 sha512_block_data_order(c, data, 1);
333} 322}
334 323
335int 324int
336SHA512_Update(SHA512_CTX *c, const void *_data, size_t len) 325SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
337{ 326{
338 SHA_LONG64 l; 327 const unsigned char *data = _data;
339 unsigned char *p = c->u.p; 328 unsigned char *p = c->u.p;
340 const unsigned char *data = (const unsigned char *)_data; 329 SHA_LONG64 l;
341 330
342 if (len == 0) 331 if (len == 0)
343 return 1; 332 return 1;
@@ -366,22 +355,10 @@ SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
366 } 355 }
367 356
368 if (len >= sizeof(c->u)) { 357 if (len >= sizeof(c->u)) {
369#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA 358 sha512_block_data_order(c, data, len/sizeof(c->u));
370 if ((size_t)data % sizeof(c->u.d[0]) != 0) { 359 data += len;
371 while (len >= sizeof(c->u)) { 360 len %= sizeof(c->u);
372 memcpy(p, data, sizeof(c->u)); 361 data -= len;
373 sha512_block_data_order(c, p, 1);
374 len -= sizeof(c->u);
375 data += sizeof(c->u);
376 }
377 } else
378#endif
379 {
380 sha512_block_data_order(c, data, len/sizeof(c->u));
381 data += len;
382 len %= sizeof(c->u);
383 data -= len;
384 }
385 } 362 }
386 363
387 if (len != 0) { 364 if (len != 0) {