summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/modes
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/modes')
-rw-r--r--src/lib/libcrypto/modes/asm/ghash-x86.pl3
-rw-r--r--src/lib/libcrypto/modes/ccm128.c90
-rw-r--r--src/lib/libcrypto/modes/ctr128.c12
-rw-r--r--src/lib/libcrypto/modes/gcm128.c1065
-rw-r--r--src/lib/libcrypto/modes/gcm128_amd64.c44
-rw-r--r--src/lib/libcrypto/modes/gcm128_i386.c56
-rw-r--r--src/lib/libcrypto/modes/modes_local.h47
-rw-r--r--src/lib/libcrypto/modes/xts128.c40
8 files changed, 365 insertions, 992 deletions
diff --git a/src/lib/libcrypto/modes/asm/ghash-x86.pl b/src/lib/libcrypto/modes/asm/ghash-x86.pl
index 47833582b6..395c680cc5 100644
--- a/src/lib/libcrypto/modes/asm/ghash-x86.pl
+++ b/src/lib/libcrypto/modes/asm/ghash-x86.pl
@@ -119,8 +119,7 @@ require "x86asm.pl";
119 119
120&asm_init($ARGV[0],"ghash-x86.pl",$x86only = $ARGV[$#ARGV] eq "386"); 120&asm_init($ARGV[0],"ghash-x86.pl",$x86only = $ARGV[$#ARGV] eq "386");
121 121
122$sse2=0; 122$sse2=1;
123for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
124 123
125($Zhh,$Zhl,$Zlh,$Zll) = ("ebp","edx","ecx","ebx"); 124($Zhh,$Zhl,$Zlh,$Zll) = ("ebp","edx","ecx","ebx");
126$inp = "edi"; 125$inp = "edi";
diff --git a/src/lib/libcrypto/modes/ccm128.c b/src/lib/libcrypto/modes/ccm128.c
index 0f592dd9e5..e27681ee62 100644
--- a/src/lib/libcrypto/modes/ccm128.c
+++ b/src/lib/libcrypto/modes/ccm128.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: ccm128.c,v 1.10 2025/04/21 16:01:18 jsing Exp $ */ 1/* $OpenBSD: ccm128.c,v 1.12 2025/05/18 09:21:29 bcook Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2011 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -61,7 +61,7 @@ CRYPTO_ccm128_init(CCM128_CONTEXT *ctx,
61 unsigned int M, unsigned int L, void *key, block128_f block) 61 unsigned int M, unsigned int L, void *key, block128_f block)
62{ 62{
63 memset(ctx->nonce.c, 0, sizeof(ctx->nonce.c)); 63 memset(ctx->nonce.c, 0, sizeof(ctx->nonce.c));
64 ctx->nonce.c[0] = ((u8)(L - 1) & 7) | (u8)(((M - 2)/2) & 7) << 3; 64 ctx->nonce.c[0] = ((uint8_t)(L - 1) & 7) | (uint8_t)(((M - 2)/2) & 7) << 3;
65 ctx->blocks = 0; 65 ctx->blocks = 0;
66 ctx->block = block; 66 ctx->block = block;
67 ctx->key = key; 67 ctx->key = key;
@@ -81,17 +81,17 @@ CRYPTO_ccm128_setiv(CCM128_CONTEXT *ctx,
81 return -1; /* nonce is too short */ 81 return -1; /* nonce is too short */
82 82
83 if (sizeof(mlen) == 8 && L >= 3) { 83 if (sizeof(mlen) == 8 && L >= 3) {
84 ctx->nonce.c[8] = (u8)(mlen >> (56 % (sizeof(mlen)*8))); 84 ctx->nonce.c[8] = (uint8_t)(mlen >> (56 % (sizeof(mlen)*8)));
85 ctx->nonce.c[9] = (u8)(mlen >> (48 % (sizeof(mlen)*8))); 85 ctx->nonce.c[9] = (uint8_t)(mlen >> (48 % (sizeof(mlen)*8)));
86 ctx->nonce.c[10] = (u8)(mlen >> (40 % (sizeof(mlen)*8))); 86 ctx->nonce.c[10] = (uint8_t)(mlen >> (40 % (sizeof(mlen)*8)));
87 ctx->nonce.c[11] = (u8)(mlen >> (32 % (sizeof(mlen)*8))); 87 ctx->nonce.c[11] = (uint8_t)(mlen >> (32 % (sizeof(mlen)*8)));
88 } else 88 } else
89 ctx->nonce.u[1] = 0; 89 ctx->nonce.u[1] = 0;
90 90
91 ctx->nonce.c[12] = (u8)(mlen >> 24); 91 ctx->nonce.c[12] = (uint8_t)(mlen >> 24);
92 ctx->nonce.c[13] = (u8)(mlen >> 16); 92 ctx->nonce.c[13] = (uint8_t)(mlen >> 16);
93 ctx->nonce.c[14] = (u8)(mlen >> 8); 93 ctx->nonce.c[14] = (uint8_t)(mlen >> 8);
94 ctx->nonce.c[15] = (u8)mlen; 94 ctx->nonce.c[15] = (uint8_t)mlen;
95 95
96 ctx->nonce.c[0] &= ~0x40; /* clear Adata flag */ 96 ctx->nonce.c[0] &= ~0x40; /* clear Adata flag */
97 memcpy(&ctx->nonce.c[1], nonce, 14 - L); 97 memcpy(&ctx->nonce.c[1], nonce, 14 - L);
@@ -116,29 +116,29 @@ CRYPTO_ccm128_aad(CCM128_CONTEXT *ctx,
116 ctx->blocks++; 116 ctx->blocks++;
117 117
118 if (alen < (0x10000 - 0x100)) { 118 if (alen < (0x10000 - 0x100)) {
119 ctx->cmac.c[0] ^= (u8)(alen >> 8); 119 ctx->cmac.c[0] ^= (uint8_t)(alen >> 8);
120 ctx->cmac.c[1] ^= (u8)alen; 120 ctx->cmac.c[1] ^= (uint8_t)alen;
121 i = 2; 121 i = 2;
122 } else if (sizeof(alen) == 8 && 122 } else if (sizeof(alen) == 8 &&
123 alen >= (size_t)1 << (32 % (sizeof(alen)*8))) { 123 alen >= (size_t)1 << (32 % (sizeof(alen)*8))) {
124 ctx->cmac.c[0] ^= 0xFF; 124 ctx->cmac.c[0] ^= 0xFF;
125 ctx->cmac.c[1] ^= 0xFF; 125 ctx->cmac.c[1] ^= 0xFF;
126 ctx->cmac.c[2] ^= (u8)(alen >> (56 % (sizeof(alen)*8))); 126 ctx->cmac.c[2] ^= (uint8_t)(alen >> (56 % (sizeof(alen)*8)));
127 ctx->cmac.c[3] ^= (u8)(alen >> (48 % (sizeof(alen)*8))); 127 ctx->cmac.c[3] ^= (uint8_t)(alen >> (48 % (sizeof(alen)*8)));
128 ctx->cmac.c[4] ^= (u8)(alen >> (40 % (sizeof(alen)*8))); 128 ctx->cmac.c[4] ^= (uint8_t)(alen >> (40 % (sizeof(alen)*8)));
129 ctx->cmac.c[5] ^= (u8)(alen >> (32 % (sizeof(alen)*8))); 129 ctx->cmac.c[5] ^= (uint8_t)(alen >> (32 % (sizeof(alen)*8)));
130 ctx->cmac.c[6] ^= (u8)(alen >> 24); 130 ctx->cmac.c[6] ^= (uint8_t)(alen >> 24);
131 ctx->cmac.c[7] ^= (u8)(alen >> 16); 131 ctx->cmac.c[7] ^= (uint8_t)(alen >> 16);
132 ctx->cmac.c[8] ^= (u8)(alen >> 8); 132 ctx->cmac.c[8] ^= (uint8_t)(alen >> 8);
133 ctx->cmac.c[9] ^= (u8)alen; 133 ctx->cmac.c[9] ^= (uint8_t)alen;
134 i = 10; 134 i = 10;
135 } else { 135 } else {
136 ctx->cmac.c[0] ^= 0xFF; 136 ctx->cmac.c[0] ^= 0xFF;
137 ctx->cmac.c[1] ^= 0xFE; 137 ctx->cmac.c[1] ^= 0xFE;
138 ctx->cmac.c[2] ^= (u8)(alen >> 24); 138 ctx->cmac.c[2] ^= (uint8_t)(alen >> 24);
139 ctx->cmac.c[3] ^= (u8)(alen >> 16); 139 ctx->cmac.c[3] ^= (uint8_t)(alen >> 16);
140 ctx->cmac.c[4] ^= (u8)(alen >> 8); 140 ctx->cmac.c[4] ^= (uint8_t)(alen >> 8);
141 ctx->cmac.c[5] ^= (u8)alen; 141 ctx->cmac.c[5] ^= (uint8_t)alen;
142 i = 6; 142 i = 6;
143 } 143 }
144 144
@@ -160,7 +160,7 @@ static void
160ctr64_inc(unsigned char *counter) 160ctr64_inc(unsigned char *counter)
161{ 161{
162 unsigned int n = 8; 162 unsigned int n = 8;
163 u8 c; 163 uint8_t c;
164 164
165 counter += 8; 165 counter += 8;
166 do { 166 do {
@@ -184,8 +184,8 @@ CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx,
184 block128_f block = ctx->block; 184 block128_f block = ctx->block;
185 void *key = ctx->key; 185 void *key = ctx->key;
186 union { 186 union {
187 u64 u[2]; 187 uint64_t u[2];
188 u8 c[16]; 188 uint8_t c[16];
189 } scratch; 189 } scratch;
190 190
191 if (!(flags0 & 0x40)) 191 if (!(flags0 & 0x40))
@@ -211,16 +211,16 @@ CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx,
211 while (len >= 16) { 211 while (len >= 16) {
212#ifdef __STRICT_ALIGNMENT 212#ifdef __STRICT_ALIGNMENT
213 union { 213 union {
214 u64 u[2]; 214 uint64_t u[2];
215 u8 c[16]; 215 uint8_t c[16];
216 } temp; 216 } temp;
217 217
218 memcpy(temp.c, inp, 16); 218 memcpy(temp.c, inp, 16);
219 ctx->cmac.u[0] ^= temp.u[0]; 219 ctx->cmac.u[0] ^= temp.u[0];
220 ctx->cmac.u[1] ^= temp.u[1]; 220 ctx->cmac.u[1] ^= temp.u[1];
221#else 221#else
222 ctx->cmac.u[0] ^= ((u64 *)inp)[0]; 222 ctx->cmac.u[0] ^= ((uint64_t *)inp)[0];
223 ctx->cmac.u[1] ^= ((u64 *)inp)[1]; 223 ctx->cmac.u[1] ^= ((uint64_t *)inp)[1];
224#endif 224#endif
225 (*block)(ctx->cmac.c, ctx->cmac.c, key); 225 (*block)(ctx->cmac.c, ctx->cmac.c, key);
226 (*block)(ctx->nonce.c, scratch.c, key); 226 (*block)(ctx->nonce.c, scratch.c, key);
@@ -230,8 +230,8 @@ CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx,
230 temp.u[1] ^= scratch.u[1]; 230 temp.u[1] ^= scratch.u[1];
231 memcpy(out, temp.c, 16); 231 memcpy(out, temp.c, 16);
232#else 232#else
233 ((u64 *)out)[0] = scratch.u[0] ^ ((u64 *)inp)[0]; 233 ((uint64_t *)out)[0] = scratch.u[0] ^ ((uint64_t *)inp)[0];
234 ((u64 *)out)[1] = scratch.u[1] ^ ((u64 *)inp)[1]; 234 ((uint64_t *)out)[1] = scratch.u[1] ^ ((uint64_t *)inp)[1];
235#endif 235#endif
236 inp += 16; 236 inp += 16;
237 out += 16; 237 out += 16;
@@ -271,8 +271,8 @@ CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx,
271 block128_f block = ctx->block; 271 block128_f block = ctx->block;
272 void *key = ctx->key; 272 void *key = ctx->key;
273 union { 273 union {
274 u64 u[2]; 274 uint64_t u[2];
275 u8 c[16]; 275 uint8_t c[16];
276 } scratch; 276 } scratch;
277 277
278 if (!(flags0 & 0x40)) 278 if (!(flags0 & 0x40))
@@ -293,8 +293,8 @@ CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx,
293 while (len >= 16) { 293 while (len >= 16) {
294#ifdef __STRICT_ALIGNMENT 294#ifdef __STRICT_ALIGNMENT
295 union { 295 union {
296 u64 u[2]; 296 uint64_t u[2];
297 u8 c[16]; 297 uint8_t c[16];
298 } temp; 298 } temp;
299#endif 299#endif
300 (*block)(ctx->nonce.c, scratch.c, key); 300 (*block)(ctx->nonce.c, scratch.c, key);
@@ -305,10 +305,10 @@ CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx,
305 ctx->cmac.u[1] ^= (scratch.u[1] ^= temp.u[1]); 305 ctx->cmac.u[1] ^= (scratch.u[1] ^= temp.u[1]);
306 memcpy(out, scratch.c, 16); 306 memcpy(out, scratch.c, 16);
307#else 307#else
308 ctx->cmac.u[0] ^= (((u64 *)out)[0] = scratch.u[0] ^ 308 ctx->cmac.u[0] ^= (((uint64_t *)out)[0] = scratch.u[0] ^
309 ((u64 *)inp)[0]); 309 ((uint64_t *)inp)[0]);
310 ctx->cmac.u[1] ^= (((u64 *)out)[1] = scratch.u[1] ^ 310 ctx->cmac.u[1] ^= (((uint64_t *)out)[1] = scratch.u[1] ^
311 ((u64 *)inp)[1]); 311 ((uint64_t *)inp)[1]);
312#endif 312#endif
313 (*block)(ctx->cmac.c, ctx->cmac.c, key); 313 (*block)(ctx->cmac.c, ctx->cmac.c, key);
314 314
@@ -363,8 +363,8 @@ CRYPTO_ccm128_encrypt_ccm64(CCM128_CONTEXT *ctx,
363 block128_f block = ctx->block; 363 block128_f block = ctx->block;
364 void *key = ctx->key; 364 void *key = ctx->key;
365 union { 365 union {
366 u64 u[2]; 366 uint64_t u[2];
367 u8 c[16]; 367 uint8_t c[16];
368 } scratch; 368 } scratch;
369 369
370 if (!(flags0 & 0x40)) 370 if (!(flags0 & 0x40))
@@ -430,8 +430,8 @@ CRYPTO_ccm128_decrypt_ccm64(CCM128_CONTEXT *ctx,
430 block128_f block = ctx->block; 430 block128_f block = ctx->block;
431 void *key = ctx->key; 431 void *key = ctx->key;
432 union { 432 union {
433 u64 u[2]; 433 uint64_t u[2];
434 u8 c[16]; 434 uint8_t c[16];
435 } scratch; 435 } scratch;
436 436
437 if (!(flags0 & 0x40)) 437 if (!(flags0 & 0x40))
diff --git a/src/lib/libcrypto/modes/ctr128.c b/src/lib/libcrypto/modes/ctr128.c
index 30563ed6e3..87d9abb355 100644
--- a/src/lib/libcrypto/modes/ctr128.c
+++ b/src/lib/libcrypto/modes/ctr128.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: ctr128.c,v 1.17 2025/04/23 10:09:08 jsing Exp $ */ 1/* $OpenBSD: ctr128.c,v 1.18 2025/05/18 09:05:59 jsing Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2008 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -63,8 +63,8 @@
63static void 63static void
64ctr128_inc(unsigned char *counter) 64ctr128_inc(unsigned char *counter)
65{ 65{
66 u32 n = 16; 66 uint32_t n = 16;
67 u8 c; 67 uint8_t c;
68 68
69 do { 69 do {
70 --n; 70 --n;
@@ -175,8 +175,8 @@ LCRYPTO_ALIAS(CRYPTO_ctr128_encrypt);
175static void 175static void
176ctr96_inc(unsigned char *counter) 176ctr96_inc(unsigned char *counter)
177{ 177{
178 u32 n = 12; 178 uint32_t n = 12;
179 u8 c; 179 uint8_t c;
180 180
181 do { 181 do {
182 --n; 182 --n;
@@ -223,7 +223,7 @@ CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out,
223 * overflow, which is then handled by limiting the 223 * overflow, which is then handled by limiting the
224 * amount of blocks to the exact overflow point... 224 * amount of blocks to the exact overflow point...
225 */ 225 */
226 ctr32 += (u32)blocks; 226 ctr32 += (uint32_t)blocks;
227 if (ctr32 < blocks) { 227 if (ctr32 < blocks) {
228 blocks -= ctr32; 228 blocks -= ctr32;
229 ctr32 = 0; 229 ctr32 = 0;
diff --git a/src/lib/libcrypto/modes/gcm128.c b/src/lib/libcrypto/modes/gcm128.c
index 21ba9eef57..b6874296e0 100644
--- a/src/lib/libcrypto/modes/gcm128.c
+++ b/src/lib/libcrypto/modes/gcm128.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: gcm128.c,v 1.35 2025/04/25 12:08:53 jsing Exp $ */ 1/* $OpenBSD: gcm128.c,v 1.54 2025/06/28 12:39:10 jsing Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2010 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -55,184 +55,12 @@
55#include "crypto_internal.h" 55#include "crypto_internal.h"
56#include "modes_local.h" 56#include "modes_local.h"
57 57
58#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16)) 58void
59#define REDUCE1BIT(V) \ 59gcm_init_4bit(u128 Htable[16], uint64_t H[2])
60 do { \
61 if (sizeof(size_t)==8) { \
62 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
63 V.lo = (V.hi<<63)|(V.lo>>1); \
64 V.hi = (V.hi>>1 )^T; \
65 } else { \
66 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
67 V.lo = (V.hi<<63)|(V.lo>>1); \
68 V.hi = (V.hi>>1 )^((u64)T<<32); \
69 } \
70 } while(0)
71
72/*
73 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
74 * never be set to 8. 8 is effectively reserved for testing purposes.
75 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
76 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
77 * whole spectrum of possible table driven implementations. Why? In
78 * non-"Shoup's" case memory access pattern is segmented in such manner,
79 * that it's trivial to see that cache timing information can reveal
80 * fair portion of intermediate hash value. Given that ciphertext is
81 * always available to attacker, it's possible for him to attempt to
82 * deduce secret parameter H and if successful, tamper with messages
83 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
84 * not as trivial, but there is no reason to believe that it's resistant
85 * to cache-timing attack. And the thing about "8-bit" implementation is
86 * that it consumes 16 (sixteen) times more memory, 4KB per individual
87 * key + 1KB shared. Well, on pros side it should be twice as fast as
88 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
89 * was observed to run ~75% faster, closer to 100% for commercial
90 * compilers... Yet "4-bit" procedure is preferred, because it's
91 * believed to provide better security-performance balance and adequate
92 * all-round performance. "All-round" refers to things like:
93 *
94 * - shorter setup time effectively improves overall timing for
95 * handling short messages;
96 * - larger table allocation can become unbearable because of VM
97 * subsystem penalties (for example on Windows large enough free
98 * results in VM working set trimming, meaning that consequent
99 * malloc would immediately incur working set expansion);
100 * - larger table has larger cache footprint, which can affect
101 * performance of other code paths (not necessarily even from same
102 * thread in Hyper-Threading world);
103 *
104 * Value of 1 is not appropriate for performance reasons.
105 */
106#if TABLE_BITS==8
107
108static void
109gcm_init_8bit(u128 Htable[256], u64 H[2])
110{
111 int i, j;
112 u128 V;
113
114 Htable[0].hi = 0;
115 Htable[0].lo = 0;
116 V.hi = H[0];
117 V.lo = H[1];
118
119 for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
120 REDUCE1BIT(V);
121 Htable[i] = V;
122 }
123
124 for (i = 2; i < 256; i <<= 1) {
125 u128 *Hi = Htable + i, H0 = *Hi;
126 for (j = 1; j < i; ++j) {
127 Hi[j].hi = H0.hi ^ Htable[j].hi;
128 Hi[j].lo = H0.lo ^ Htable[j].lo;
129 }
130 }
131}
132
133static void
134gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
135{
136 u128 Z = { 0, 0};
137 const u8 *xi = (const u8 *)Xi + 15;
138 size_t rem, n = *xi;
139 static const size_t rem_8bit[256] = {
140 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
141 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
142 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
143 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
144 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
145 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
146 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
147 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
148 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
149 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
150 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
151 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
152 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
153 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
154 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
155 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
156 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
157 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
158 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
159 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
160 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
161 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
162 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
163 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
164 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
165 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
166 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
167 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
168 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
169 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
170 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
171 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
172 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
173 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
174 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
175 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
176 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
177 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
178 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
179 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
180 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
181 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
182 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
183 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
184 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
185 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
186 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
187 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
188 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
189 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
190 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
191 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
192 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
193 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
194 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
195 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
196 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
197 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
198 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
199 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
200 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
201 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
202 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
203 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
204
205 while (1) {
206 Z.hi ^= Htable[n].hi;
207 Z.lo ^= Htable[n].lo;
208
209 if ((u8 *)Xi == xi)
210 break;
211
212 n = *(--xi);
213
214 rem = (size_t)Z.lo & 0xff;
215 Z.lo = (Z.hi << 56)|(Z.lo >> 8);
216 Z.hi = (Z.hi >> 8);
217#if SIZE_MAX == 0xffffffffffffffff
218 Z.hi ^= rem_8bit[rem];
219#else
220 Z.hi ^= (u64)rem_8bit[rem] << 32;
221#endif
222 }
223
224 Xi[0] = htobe64(Z.hi);
225 Xi[1] = htobe64(Z.lo);
226}
227#define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
228
229#elif TABLE_BITS==4
230
231static void
232gcm_init_4bit(u128 Htable[16], u64 H[2])
233{ 60{
234 u128 V; 61 u128 V;
235 int i; 62 uint64_t T;
63 int i;
236 64
237 Htable[0].hi = 0; 65 Htable[0].hi = 0;
238 Htable[0].lo = 0; 66 Htable[0].lo = 0;
@@ -240,57 +68,41 @@ gcm_init_4bit(u128 Htable[16], u64 H[2])
240 V.lo = H[1]; 68 V.lo = H[1];
241 69
242 for (Htable[8] = V, i = 4; i > 0; i >>= 1) { 70 for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
243 REDUCE1BIT(V); 71 T = U64(0xe100000000000000) & (0 - (V.lo & 1));
72 V.lo = (V.hi << 63) | (V.lo >> 1);
73 V.hi = (V.hi >> 1 ) ^ T;
244 Htable[i] = V; 74 Htable[i] = V;
245 } 75 }
246 76
247 for (i = 2; i < 16; i <<= 1) { 77 for (i = 2; i < 16; i <<= 1) {
248 u128 *Hi = Htable + i; 78 u128 *Hi = Htable + i;
249 int j; 79 int j;
250 for (V = *Hi, j = 1; j < i; ++j) { 80 for (V = *Hi, j = 1; j < i; j++) {
251 Hi[j].hi = V.hi ^ Htable[j].hi; 81 Hi[j].hi = V.hi ^ Htable[j].hi;
252 Hi[j].lo = V.lo ^ Htable[j].lo; 82 Hi[j].lo = V.lo ^ Htable[j].lo;
253 } 83 }
254 } 84 }
255
256#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
257 /*
258 * ARM assembler expects specific dword order in Htable.
259 */
260 {
261 int j;
262#if BYTE_ORDER == LITTLE_ENDIAN
263 for (j = 0; j < 16; ++j) {
264 V = Htable[j];
265 Htable[j].hi = V.lo;
266 Htable[j].lo = V.hi;
267 }
268#else /* BIG_ENDIAN */
269 for (j = 0; j < 16; ++j) {
270 V = Htable[j];
271 Htable[j].hi = V.lo << 32|V.lo >> 32;
272 Htable[j].lo = V.hi << 32|V.hi >> 32;
273 }
274#endif
275 }
276#endif
277} 85}
278 86
279#ifndef GHASH_ASM 87#ifdef GHASH_ASM
280static const size_t rem_4bit[16] = { 88void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
281 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460), 89void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
282 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0), 90 size_t len);
283 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), 91
284 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) }; 92#else
93static const uint16_t rem_4bit[16] = {
94 0x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0,
95 0xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0,
96};
285 97
286static void 98static void
287gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) 99gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16])
288{ 100{
289 u128 Z; 101 u128 Z;
290 int cnt = 15; 102 int cnt = 15;
291 size_t rem, nlo, nhi; 103 size_t rem, nlo, nhi;
292 104
293 nlo = ((const u8 *)Xi)[15]; 105 nlo = ((const uint8_t *)Xi)[15];
294 nhi = nlo >> 4; 106 nhi = nlo >> 4;
295 nlo &= 0xf; 107 nlo &= 0xf;
296 108
@@ -301,29 +113,21 @@ gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
301 rem = (size_t)Z.lo & 0xf; 113 rem = (size_t)Z.lo & 0xf;
302 Z.lo = (Z.hi << 60)|(Z.lo >> 4); 114 Z.lo = (Z.hi << 60)|(Z.lo >> 4);
303 Z.hi = (Z.hi >> 4); 115 Z.hi = (Z.hi >> 4);
304#if SIZE_MAX == 0xffffffffffffffff 116 Z.hi ^= (uint64_t)rem_4bit[rem] << 48;
305 Z.hi ^= rem_4bit[rem];
306#else
307 Z.hi ^= (u64)rem_4bit[rem] << 32;
308#endif
309 Z.hi ^= Htable[nhi].hi; 117 Z.hi ^= Htable[nhi].hi;
310 Z.lo ^= Htable[nhi].lo; 118 Z.lo ^= Htable[nhi].lo;
311 119
312 if (--cnt < 0) 120 if (--cnt < 0)
313 break; 121 break;
314 122
315 nlo = ((const u8 *)Xi)[cnt]; 123 nlo = ((const uint8_t *)Xi)[cnt];
316 nhi = nlo >> 4; 124 nhi = nlo >> 4;
317 nlo &= 0xf; 125 nlo &= 0xf;
318 126
319 rem = (size_t)Z.lo & 0xf; 127 rem = (size_t)Z.lo & 0xf;
320 Z.lo = (Z.hi << 60)|(Z.lo >> 4); 128 Z.lo = (Z.hi << 60)|(Z.lo >> 4);
321 Z.hi = (Z.hi >> 4); 129 Z.hi = (Z.hi >> 4);
322#if SIZE_MAX == 0xffffffffffffffff 130 Z.hi ^= (uint64_t)rem_4bit[rem] << 48;
323 Z.hi ^= rem_4bit[rem];
324#else
325 Z.hi ^= (u64)rem_4bit[rem] << 32;
326#endif
327 Z.hi ^= Htable[nlo].hi; 131 Z.hi ^= Htable[nlo].hi;
328 Z.lo ^= Htable[nlo].lo; 132 Z.lo ^= Htable[nlo].lo;
329 } 133 }
@@ -332,25 +136,17 @@ gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
332 Xi[1] = htobe64(Z.lo); 136 Xi[1] = htobe64(Z.lo);
333} 137}
334 138
335/*
336 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
337 * details... Compiler-generated code doesn't seem to give any
338 * performance improvement, at least not on x86[_64]. It's here
339 * mostly as reference and a placeholder for possible future
340 * non-trivial optimization[s]...
341 */
342static void 139static void
343gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], 140gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16],
344 const u8 *inp, size_t len) 141 const uint8_t *inp, size_t len)
345{ 142{
346 u128 Z; 143 u128 Z;
347 int cnt; 144 int cnt;
348 size_t rem, nlo, nhi; 145 size_t rem, nlo, nhi;
349 146
350#if 1
351 do { 147 do {
352 cnt = 15; 148 cnt = 15;
353 nlo = ((const u8 *)Xi)[15]; 149 nlo = ((const uint8_t *)Xi)[15];
354 nlo ^= inp[15]; 150 nlo ^= inp[15];
355 nhi = nlo >> 4; 151 nhi = nlo >> 4;
356 nlo &= 0xf; 152 nlo &= 0xf;
@@ -362,18 +158,14 @@ gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
362 rem = (size_t)Z.lo & 0xf; 158 rem = (size_t)Z.lo & 0xf;
363 Z.lo = (Z.hi << 60)|(Z.lo >> 4); 159 Z.lo = (Z.hi << 60)|(Z.lo >> 4);
364 Z.hi = (Z.hi >> 4); 160 Z.hi = (Z.hi >> 4);
365#if SIZE_MAX == 0xffffffffffffffff 161 Z.hi ^= (uint64_t)rem_4bit[rem] << 48;
366 Z.hi ^= rem_4bit[rem];
367#else
368 Z.hi ^= (u64)rem_4bit[rem] << 32;
369#endif
370 Z.hi ^= Htable[nhi].hi; 162 Z.hi ^= Htable[nhi].hi;
371 Z.lo ^= Htable[nhi].lo; 163 Z.lo ^= Htable[nhi].lo;
372 164
373 if (--cnt < 0) 165 if (--cnt < 0)
374 break; 166 break;
375 167
376 nlo = ((const u8 *)Xi)[cnt]; 168 nlo = ((const uint8_t *)Xi)[cnt];
377 nlo ^= inp[cnt]; 169 nlo ^= inp[cnt];
378 nhi = nlo >> 4; 170 nhi = nlo >> 4;
379 nlo &= 0xf; 171 nlo &= 0xf;
@@ -381,205 +173,40 @@ gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
381 rem = (size_t)Z.lo & 0xf; 173 rem = (size_t)Z.lo & 0xf;
382 Z.lo = (Z.hi << 60)|(Z.lo >> 4); 174 Z.lo = (Z.hi << 60)|(Z.lo >> 4);
383 Z.hi = (Z.hi >> 4); 175 Z.hi = (Z.hi >> 4);
384#if SIZE_MAX == 0xffffffffffffffff 176 Z.hi ^= (uint64_t)rem_4bit[rem] << 48;
385 Z.hi ^= rem_4bit[rem];
386#else
387 Z.hi ^= (u64)rem_4bit[rem] << 32;
388#endif
389 Z.hi ^= Htable[nlo].hi; 177 Z.hi ^= Htable[nlo].hi;
390 Z.lo ^= Htable[nlo].lo; 178 Z.lo ^= Htable[nlo].lo;
391 } 179 }
392#else
393 /*
394 * Extra 256+16 bytes per-key plus 512 bytes shared tables
395 * [should] give ~50% improvement... One could have PACK()-ed
396 * the rem_8bit even here, but the priority is to minimize
397 * cache footprint...
398 */
399 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
400 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
401 static const unsigned short rem_8bit[256] = {
402 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
403 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
404 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
405 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
406 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
407 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
408 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
409 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
410 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
411 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
412 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
413 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
414 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
415 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
416 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
417 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
418 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
419 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
420 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
421 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
422 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
423 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
424 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
425 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
426 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
427 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
428 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
429 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
430 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
431 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
432 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
433 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
434 /*
435 * This pre-processing phase slows down procedure by approximately
436 * same time as it makes each loop spin faster. In other words
437 * single block performance is approximately same as straightforward
438 * "4-bit" implementation, and then it goes only faster...
439 */
440 for (cnt = 0; cnt < 16; ++cnt) {
441 Z.hi = Htable[cnt].hi;
442 Z.lo = Htable[cnt].lo;
443 Hshr4[cnt].lo = (Z.hi << 60)|(Z.lo >> 4);
444 Hshr4[cnt].hi = (Z.hi >> 4);
445 Hshl4[cnt] = (u8)(Z.lo << 4);
446 }
447
448 do {
449 for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
450 nlo = ((const u8 *)Xi)[cnt];
451 nlo ^= inp[cnt];
452 nhi = nlo >> 4;
453 nlo &= 0xf;
454
455 Z.hi ^= Htable[nlo].hi;
456 Z.lo ^= Htable[nlo].lo;
457
458 rem = (size_t)Z.lo & 0xff;
459
460 Z.lo = (Z.hi << 56)|(Z.lo >> 8);
461 Z.hi = (Z.hi >> 8);
462
463 Z.hi ^= Hshr4[nhi].hi;
464 Z.lo ^= Hshr4[nhi].lo;
465 Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
466 }
467
468 nlo = ((const u8 *)Xi)[0];
469 nlo ^= inp[0];
470 nhi = nlo >> 4;
471 nlo &= 0xf;
472
473 Z.hi ^= Htable[nlo].hi;
474 Z.lo ^= Htable[nlo].lo;
475
476 rem = (size_t)Z.lo & 0xf;
477
478 Z.lo = (Z.hi << 60)|(Z.lo >> 4);
479 Z.hi = (Z.hi >> 4);
480
481 Z.hi ^= Htable[nhi].hi;
482 Z.lo ^= Htable[nhi].lo;
483 Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
484#endif
485 180
486 Xi[0] = htobe64(Z.hi); 181 Xi[0] = htobe64(Z.hi);
487 Xi[1] = htobe64(Z.lo); 182 Xi[1] = htobe64(Z.lo);
488 } while (inp += 16, len -= 16); 183 } while (inp += 16, len -= 16);
489} 184}
490#else
491void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
492void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
493 size_t len);
494#endif 185#endif
495 186
496#define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable) 187static inline void
497#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len) 188gcm_mul(GCM128_CONTEXT *ctx, uint64_t u[2])
498/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
499 * trashing effect. In other words idea is to hash data while it's
500 * still in L1 cache after encryption pass... */
501#define GHASH_CHUNK (3*1024)
502
503#else /* TABLE_BITS */
504
505static void
506gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
507{ 189{
508 u128 V, Z = { 0, 0 }; 190 ctx->gmult(u, ctx->Htable);
509 u64 X;
510 int i, j;
511
512 V.hi = H[0]; /* H is in host byte order, no byte swapping */
513 V.lo = H[1];
514
515 for (j = 0; j < 2; j++) {
516 X = be64toh(Xi[j]);
517
518 for (i = 0; i < 64; i++) {
519 u64 M = 0 - (X >> 63);
520 Z.hi ^= V.hi & M;
521 Z.lo ^= V.lo & M;
522 X <<= 1;
523
524 REDUCE1BIT(V);
525 }
526 }
527
528 Xi[0] = htobe64(Z.hi);
529 Xi[1] = htobe64(Z.lo);
530} 191}
531#define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
532
533#endif
534
535#if defined(GHASH_ASM) && \
536 (defined(__i386) || defined(__i386__) || \
537 defined(__x86_64) || defined(__x86_64__) || \
538 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
539#include "x86_arch.h"
540#endif
541
542#if TABLE_BITS==4 && defined(GHASH_ASM)
543# if (defined(__i386) || defined(__i386__) || \
544 defined(__x86_64) || defined(__x86_64__) || \
545 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
546# define GHASH_ASM_X86_OR_64
547# define GCM_FUNCREF_4BIT
548
549void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
550void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
551void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
552 size_t len);
553 192
554# if defined(__i386) || defined(__i386__) || defined(_M_IX86) 193static inline void
555# define GHASH_ASM_X86 194gcm_ghash(GCM128_CONTEXT *ctx, const uint8_t *in, size_t len)
556void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]); 195{
557void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp, 196 ctx->ghash(ctx->Xi.u, ctx->Htable, in, len);
558 size_t len); 197}
559 198
560void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]); 199#ifdef HAVE_GCM128_INIT
561void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp, 200void gcm128_init(GCM128_CONTEXT *ctx);
562 size_t len);
563# endif
564# elif defined(__arm__) || defined(__arm)
565# include "arm_arch.h"
566# if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
567# define GHASH_ASM_ARM
568# define GCM_FUNCREF_4BIT
569void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
570void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
571 size_t len);
572# endif
573# endif
574#endif
575 201
576#ifdef GCM_FUNCREF_4BIT 202#else
577# undef GCM_MUL 203static void
578# define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable) 204gcm128_init(GCM128_CONTEXT *ctx)
579# ifdef GHASH 205{
580# undef GHASH 206 gcm_init_4bit(ctx->Htable, ctx->H.u);
581# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len) 207 ctx->gmult = gcm_gmult_4bit;
582# endif 208 ctx->ghash = gcm_ghash_4bit;
209}
583#endif 210#endif
584 211
585void 212void
@@ -595,60 +222,35 @@ CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
595 ctx->H.u[0] = be64toh(ctx->H.u[0]); 222 ctx->H.u[0] = be64toh(ctx->H.u[0]);
596 ctx->H.u[1] = be64toh(ctx->H.u[1]); 223 ctx->H.u[1] = be64toh(ctx->H.u[1]);
597 224
598#if TABLE_BITS==8 225 gcm128_init(ctx);
599 gcm_init_8bit(ctx->Htable, ctx->H.u);
600#elif TABLE_BITS==4
601# if defined(GHASH_ASM_X86_OR_64)
602# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
603 /* check FXSR and PCLMULQDQ bits */
604 if ((crypto_cpu_caps_ia32() & (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) ==
605 (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) {
606 gcm_init_clmul(ctx->Htable, ctx->H.u);
607 ctx->gmult = gcm_gmult_clmul;
608 ctx->ghash = gcm_ghash_clmul;
609 return;
610 }
611# endif
612 gcm_init_4bit(ctx->Htable, ctx->H.u);
613# if defined(GHASH_ASM_X86) /* x86 only */
614# if defined(OPENSSL_IA32_SSE2)
615 if (crypto_cpu_caps_ia32() & CPUCAP_MASK_SSE) { /* check SSE bit */
616# else
617 if (crypto_cpu_caps_ia32() & CPUCAP_MASK_MMX) { /* check MMX bit */
618# endif
619 ctx->gmult = gcm_gmult_4bit_mmx;
620 ctx->ghash = gcm_ghash_4bit_mmx;
621 } else {
622 ctx->gmult = gcm_gmult_4bit_x86;
623 ctx->ghash = gcm_ghash_4bit_x86;
624 }
625# else
626 ctx->gmult = gcm_gmult_4bit;
627 ctx->ghash = gcm_ghash_4bit;
628# endif
629# elif defined(GHASH_ASM_ARM)
630 if (OPENSSL_armcap_P & ARMV7_NEON) {
631 ctx->gmult = gcm_gmult_neon;
632 ctx->ghash = gcm_ghash_neon;
633 } else {
634 gcm_init_4bit(ctx->Htable, ctx->H.u);
635 ctx->gmult = gcm_gmult_4bit;
636 ctx->ghash = gcm_ghash_4bit;
637 }
638# else
639 gcm_init_4bit(ctx->Htable, ctx->H.u);
640# endif
641#endif
642} 226}
643LCRYPTO_ALIAS(CRYPTO_gcm128_init); 227LCRYPTO_ALIAS(CRYPTO_gcm128_init);
644 228
229GCM128_CONTEXT *
230CRYPTO_gcm128_new(void *key, block128_f block)
231{
232 GCM128_CONTEXT *ctx;
233
234 if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
235 return NULL;
236
237 CRYPTO_gcm128_init(ctx, key, block);
238
239 return ctx;
240}
241LCRYPTO_ALIAS(CRYPTO_gcm128_new);
242
243void
244CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
245{
246 freezero(ctx, sizeof(*ctx));
247}
248LCRYPTO_ALIAS(CRYPTO_gcm128_release);
249
645void 250void
646CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, size_t len) 251CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, size_t len)
647{ 252{
648 unsigned int ctr; 253 unsigned int ctr;
649#ifdef GCM_FUNCREF_4BIT
650 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
651#endif
652 254
653 ctx->Yi.u[0] = 0; 255 ctx->Yi.u[0] = 0;
654 ctx->Yi.u[1] = 0; 256 ctx->Yi.u[1] = 0;
@@ -665,573 +267,277 @@ CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, size_t len)
665 ctr = 1; 267 ctr = 1;
666 } else { 268 } else {
667 size_t i; 269 size_t i;
668 u64 len0 = len; 270 uint64_t len0 = len;
669 271
670 while (len >= 16) { 272 while (len >= 16) {
671 for (i = 0; i < 16; ++i) 273 for (i = 0; i < 16; i++)
672 ctx->Yi.c[i] ^= iv[i]; 274 ctx->Yi.c[i] ^= iv[i];
673 GCM_MUL(ctx, Yi); 275 gcm_mul(ctx, ctx->Yi.u);
674 iv += 16; 276 iv += 16;
675 len -= 16; 277 len -= 16;
676 } 278 }
677 if (len) { 279 if (len > 0) {
678 for (i = 0; i < len; ++i) 280 for (i = 0; i < len; i++)
679 ctx->Yi.c[i] ^= iv[i]; 281 ctx->Yi.c[i] ^= iv[i];
680 GCM_MUL(ctx, Yi); 282 gcm_mul(ctx, ctx->Yi.u);
681 } 283 }
682 len0 <<= 3; 284 len0 <<= 3;
683 ctx->Yi.u[1] ^= htobe64(len0); 285 ctx->Yi.u[1] ^= htobe64(len0);
684 286
685 GCM_MUL(ctx, Yi); 287 gcm_mul(ctx, ctx->Yi.u);
686 288
687 ctr = be32toh(ctx->Yi.d[3]); 289 ctr = be32toh(ctx->Yi.d[3]);
688 } 290 }
689 291
690 (*ctx->block)(ctx->Yi.c, ctx->EK0.c, ctx->key); 292 (*ctx->block)(ctx->Yi.c, ctx->EK0.c, ctx->key);
691 ++ctr; 293 ctx->Yi.d[3] = htobe32(++ctr);
692 ctx->Yi.d[3] = htobe32(ctr);
693} 294}
694LCRYPTO_ALIAS(CRYPTO_gcm128_setiv); 295LCRYPTO_ALIAS(CRYPTO_gcm128_setiv);
695 296
696int 297int
697CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, size_t len) 298CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, size_t len)
698{ 299{
699 size_t i;
700 unsigned int n; 300 unsigned int n;
701 u64 alen = ctx->len.u[0]; 301 uint64_t alen;
702#ifdef GCM_FUNCREF_4BIT 302 size_t i;
703 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
704# ifdef GHASH
705 void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
706 const u8 *inp, size_t len) = ctx->ghash;
707# endif
708#endif
709 303
710 if (ctx->len.u[1]) 304 if (ctx->len.u[1] != 0)
711 return -2; 305 return -2;
712 306
713 alen += len; 307 alen = ctx->len.u[0] + len;
714 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len)) 308 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
715 return -1; 309 return -1;
716 ctx->len.u[0] = alen; 310 ctx->len.u[0] = alen;
717 311
718 n = ctx->ares; 312 if ((n = ctx->ares) > 0) {
719 if (n) { 313 while (n > 0 && len > 0) {
720 while (n && len) {
721 ctx->Xi.c[n] ^= *(aad++); 314 ctx->Xi.c[n] ^= *(aad++);
722 --len;
723 n = (n + 1) % 16; 315 n = (n + 1) % 16;
316 len--;
724 } 317 }
725 if (n == 0) 318 if (n > 0) {
726 GCM_MUL(ctx, Xi);
727 else {
728 ctx->ares = n; 319 ctx->ares = n;
729 return 0; 320 return 0;
730 } 321 }
322 gcm_mul(ctx, ctx->Xi.u);
731 } 323 }
732 324
733#ifdef GHASH 325 if ((i = (len & (size_t)-16)) > 0) {
734 if ((i = (len & (size_t)-16))) { 326 gcm_ghash(ctx, aad, i);
735 GHASH(ctx, aad, i);
736 aad += i; 327 aad += i;
737 len -= i; 328 len -= i;
738 } 329 }
739#else 330 if (len > 0) {
740 while (len >= 16) {
741 for (i = 0; i < 16; ++i)
742 ctx->Xi.c[i] ^= aad[i];
743 GCM_MUL(ctx, Xi);
744 aad += 16;
745 len -= 16;
746 }
747#endif
748 if (len) {
749 n = (unsigned int)len; 331 n = (unsigned int)len;
750 for (i = 0; i < len; ++i) 332 for (i = 0; i < len; i++)
751 ctx->Xi.c[i] ^= aad[i]; 333 ctx->Xi.c[i] ^= aad[i];
752 } 334 }
753
754 ctx->ares = n; 335 ctx->ares = n;
336
755 return 0; 337 return 0;
756} 338}
757LCRYPTO_ALIAS(CRYPTO_gcm128_aad); 339LCRYPTO_ALIAS(CRYPTO_gcm128_aad);
758 340
759int 341int
760CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, 342CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const unsigned char *in,
761 const unsigned char *in, unsigned char *out, 343 unsigned char *out, size_t len)
762 size_t len)
763{ 344{
764 unsigned int n, ctr; 345 unsigned int n, ctr;
346 uint64_t mlen;
765 size_t i; 347 size_t i;
766 u64 mlen = ctx->len.u[1];
767 block128_f block = ctx->block;
768 void *key = ctx->key;
769#ifdef GCM_FUNCREF_4BIT
770 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
771# ifdef GHASH
772 void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
773 const u8 *inp, size_t len) = ctx->ghash;
774# endif
775#endif
776 348
777 mlen += len; 349 mlen = ctx->len.u[1] + len;
778 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 350 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
779 return -1; 351 return -1;
780 ctx->len.u[1] = mlen; 352 ctx->len.u[1] = mlen;
781 353
782 if (ctx->ares) { 354 if (ctx->ares > 0) {
783 /* First call to encrypt finalizes GHASH(AAD) */ 355 /* First call to encrypt finalizes GHASH(AAD) */
784 GCM_MUL(ctx, Xi); 356 gcm_mul(ctx, ctx->Xi.u);
785 ctx->ares = 0; 357 ctx->ares = 0;
786 } 358 }
787 359
788 ctr = be32toh(ctx->Yi.d[3]); 360 ctr = be32toh(ctx->Yi.d[3]);
789 361
790 n = ctx->mres; 362 n = ctx->mres;
791 if (16 % sizeof(size_t) == 0)
792 do { /* always true actually */
793 if (n) {
794 while (n && len) {
795 ctx->Xi.c[n] ^= *(out++) = *(in++) ^
796 ctx->EKi.c[n];
797 --len;
798 n = (n + 1) % 16;
799 }
800 if (n == 0)
801 GCM_MUL(ctx, Xi);
802 else {
803 ctx->mres = n;
804 return 0;
805 }
806 }
807#ifdef __STRICT_ALIGNMENT
808 if (((size_t)in|(size_t)out) % sizeof(size_t) != 0)
809 break;
810#endif
811#if defined(GHASH) && defined(GHASH_CHUNK)
812 while (len >= GHASH_CHUNK) {
813 size_t j = GHASH_CHUNK;
814
815 while (j) {
816 size_t *out_t = (size_t *)out;
817 const size_t *in_t = (const size_t *)in;
818
819 (*block)(ctx->Yi.c, ctx->EKi.c, key);
820 ++ctr;
821 ctx->Yi.d[3] = htobe32(ctr);
822
823 for (i = 0; i < 16/sizeof(size_t); ++i)
824 out_t[i] = in_t[i] ^
825 ctx->EKi.t[i];
826 out += 16;
827 in += 16;
828 j -= 16;
829 }
830 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
831 len -= GHASH_CHUNK;
832 }
833 if ((i = (len & (size_t)-16))) {
834 size_t j = i;
835
836 while (len >= 16) {
837 size_t *out_t = (size_t *)out;
838 const size_t *in_t = (const size_t *)in;
839
840 (*block)(ctx->Yi.c, ctx->EKi.c, key);
841 ++ctr;
842 ctx->Yi.d[3] = htobe32(ctr);
843
844 for (i = 0; i < 16/sizeof(size_t); ++i)
845 out_t[i] = in_t[i] ^
846 ctx->EKi.t[i];
847 out += 16;
848 in += 16;
849 len -= 16;
850 }
851 GHASH(ctx, out - j, j);
852 }
853#else
854 while (len >= 16) {
855 size_t *out_t = (size_t *)out;
856 const size_t *in_t = (const size_t *)in;
857
858 (*block)(ctx->Yi.c, ctx->EKi.c, key);
859 ++ctr;
860 ctx->Yi.d[3] = htobe32(ctr);
861
862 for (i = 0; i < 16/sizeof(size_t); ++i)
863 ctx->Xi.t[i] ^=
864 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
865 GCM_MUL(ctx, Xi);
866 out += 16;
867 in += 16;
868 len -= 16;
869 }
870#endif
871 if (len) {
872 (*block)(ctx->Yi.c, ctx->EKi.c, key);
873 ++ctr;
874 ctx->Yi.d[3] = htobe32(ctr);
875
876 while (len--) {
877 ctx->Xi.c[n] ^= out[n] = in[n] ^
878 ctx->EKi.c[n];
879 ++n;
880 }
881 }
882 363
883 ctx->mres = n; 364 for (i = 0; i < len; i++) {
884 return 0;
885 } while (0);
886 for (i = 0; i < len; ++i) {
887 if (n == 0) { 365 if (n == 0) {
888 (*block)(ctx->Yi.c, ctx->EKi.c, key); 366 ctx->block(ctx->Yi.c, ctx->EKi.c, ctx->key);
889 ++ctr; 367 ctx->Yi.d[3] = htobe32(++ctr);
890 ctx->Yi.d[3] = htobe32(ctr);
891 } 368 }
892 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n]; 369 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
893 n = (n + 1) % 16; 370 n = (n + 1) % 16;
894 if (n == 0) 371 if (n == 0)
895 GCM_MUL(ctx, Xi); 372 gcm_mul(ctx, ctx->Xi.u);
896 } 373 }
897 374
898 ctx->mres = n; 375 ctx->mres = n;
376
899 return 0; 377 return 0;
900} 378}
901LCRYPTO_ALIAS(CRYPTO_gcm128_encrypt); 379LCRYPTO_ALIAS(CRYPTO_gcm128_encrypt);
902 380
903int 381int
904CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, 382CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const unsigned char *in,
905 const unsigned char *in, unsigned char *out, 383 unsigned char *out, size_t len)
906 size_t len)
907{ 384{
908 unsigned int n, ctr; 385 unsigned int n, ctr;
386 uint64_t mlen;
387 uint8_t c;
909 size_t i; 388 size_t i;
910 u64 mlen = ctx->len.u[1];
911 block128_f block = ctx->block;
912 void *key = ctx->key;
913#ifdef GCM_FUNCREF_4BIT
914 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
915# ifdef GHASH
916 void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
917 const u8 *inp, size_t len) = ctx->ghash;
918# endif
919#endif
920 389
921 mlen += len; 390 mlen = ctx->len.u[1] + len;
922 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 391 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
923 return -1; 392 return -1;
924 ctx->len.u[1] = mlen; 393 ctx->len.u[1] = mlen;
925 394
926 if (ctx->ares) { 395 if (ctx->ares) {
927 /* First call to decrypt finalizes GHASH(AAD) */ 396 /* First call to decrypt finalizes GHASH(AAD) */
928 GCM_MUL(ctx, Xi); 397 gcm_mul(ctx, ctx->Xi.u);
929 ctx->ares = 0; 398 ctx->ares = 0;
930 } 399 }
931 400
932 ctr = be32toh(ctx->Yi.d[3]); 401 ctr = be32toh(ctx->Yi.d[3]);
933 402
934 n = ctx->mres; 403 n = ctx->mres;
935 if (16 % sizeof(size_t) == 0)
936 do { /* always true actually */
937 if (n) {
938 while (n && len) {
939 u8 c = *(in++);
940 *(out++) = c ^ ctx->EKi.c[n];
941 ctx->Xi.c[n] ^= c;
942 --len;
943 n = (n + 1) % 16;
944 }
945 if (n == 0)
946 GCM_MUL(ctx, Xi);
947 else {
948 ctx->mres = n;
949 return 0;
950 }
951 }
952#ifdef __STRICT_ALIGNMENT
953 if (((size_t)in|(size_t)out) % sizeof(size_t) != 0)
954 break;
955#endif
956#if defined(GHASH) && defined(GHASH_CHUNK)
957 while (len >= GHASH_CHUNK) {
958 size_t j = GHASH_CHUNK;
959
960 GHASH(ctx, in, GHASH_CHUNK);
961 while (j) {
962 size_t *out_t = (size_t *)out;
963 const size_t *in_t = (const size_t *)in;
964
965 (*block)(ctx->Yi.c, ctx->EKi.c, key);
966 ++ctr;
967 ctx->Yi.d[3] = htobe32(ctr);
968
969 for (i = 0; i < 16/sizeof(size_t); ++i)
970 out_t[i] = in_t[i] ^
971 ctx->EKi.t[i];
972 out += 16;
973 in += 16;
974 j -= 16;
975 }
976 len -= GHASH_CHUNK;
977 }
978 if ((i = (len & (size_t)-16))) {
979 GHASH(ctx, in, i);
980 while (len >= 16) {
981 size_t *out_t = (size_t *)out;
982 const size_t *in_t = (const size_t *)in;
983
984 (*block)(ctx->Yi.c, ctx->EKi.c, key);
985 ++ctr;
986 ctx->Yi.d[3] = htobe32(ctr);
987
988 for (i = 0; i < 16/sizeof(size_t); ++i)
989 out_t[i] = in_t[i] ^
990 ctx->EKi.t[i];
991 out += 16;
992 in += 16;
993 len -= 16;
994 }
995 }
996#else
997 while (len >= 16) {
998 size_t *out_t = (size_t *)out;
999 const size_t *in_t = (const size_t *)in;
1000
1001 (*block)(ctx->Yi.c, ctx->EKi.c, key);
1002 ++ctr;
1003 ctx->Yi.d[3] = htobe32(ctr);
1004
1005 for (i = 0; i < 16/sizeof(size_t); ++i) {
1006 size_t c = in_t[i];
1007 out_t[i] = c ^ ctx->EKi.t[i];
1008 ctx->Xi.t[i] ^= c;
1009 }
1010 GCM_MUL(ctx, Xi);
1011 out += 16;
1012 in += 16;
1013 len -= 16;
1014 }
1015#endif
1016 if (len) {
1017 (*block)(ctx->Yi.c, ctx->EKi.c, key);
1018 ++ctr;
1019 ctx->Yi.d[3] = htobe32(ctr);
1020
1021 while (len--) {
1022 u8 c = in[n];
1023 ctx->Xi.c[n] ^= c;
1024 out[n] = c ^ ctx->EKi.c[n];
1025 ++n;
1026 }
1027 }
1028 404
1029 ctx->mres = n; 405 for (i = 0; i < len; i++) {
1030 return 0;
1031 } while (0);
1032 for (i = 0; i < len; ++i) {
1033 u8 c;
1034 if (n == 0) { 406 if (n == 0) {
1035 (*block)(ctx->Yi.c, ctx->EKi.c, key); 407 ctx->block(ctx->Yi.c, ctx->EKi.c, ctx->key);
1036 ++ctr; 408 ctx->Yi.d[3] = htobe32(++ctr);
1037 ctx->Yi.d[3] = htobe32(ctr);
1038 } 409 }
1039 c = in[i]; 410 c = in[i];
1040 out[i] = c ^ ctx->EKi.c[n]; 411 out[i] = c ^ ctx->EKi.c[n];
1041 ctx->Xi.c[n] ^= c; 412 ctx->Xi.c[n] ^= c;
1042 n = (n + 1) % 16; 413 n = (n + 1) % 16;
1043 if (n == 0) 414 if (n == 0)
1044 GCM_MUL(ctx, Xi); 415 gcm_mul(ctx, ctx->Xi.u);
1045 } 416 }
1046 417
1047 ctx->mres = n; 418 ctx->mres = n;
419
1048 return 0; 420 return 0;
1049} 421}
1050LCRYPTO_ALIAS(CRYPTO_gcm128_decrypt); 422LCRYPTO_ALIAS(CRYPTO_gcm128_decrypt);
1051 423
1052int 424int
1053CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, 425CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const unsigned char *in,
1054 const unsigned char *in, unsigned char *out, 426 unsigned char *out, size_t len, ctr128_f stream)
1055 size_t len, ctr128_f stream)
1056{ 427{
1057 unsigned int n, ctr; 428 unsigned int n, ctr;
1058 size_t i; 429 uint64_t mlen;
1059 u64 mlen = ctx->len.u[1]; 430 size_t i, j;
1060 void *key = ctx->key;
1061#ifdef GCM_FUNCREF_4BIT
1062 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1063# ifdef GHASH
1064 void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
1065 const u8 *inp, size_t len) = ctx->ghash;
1066# endif
1067#endif
1068 431
1069 mlen += len; 432 mlen = ctx->len.u[1] + len;
1070 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 433 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1071 return -1; 434 return -1;
1072 ctx->len.u[1] = mlen; 435 ctx->len.u[1] = mlen;
1073 436
1074 if (ctx->ares) { 437 if (ctx->ares > 0) {
1075 /* First call to encrypt finalizes GHASH(AAD) */ 438 /* First call to encrypt finalizes GHASH(AAD) */
1076 GCM_MUL(ctx, Xi); 439 gcm_mul(ctx, ctx->Xi.u);
1077 ctx->ares = 0; 440 ctx->ares = 0;
1078 } 441 }
1079 442
1080 ctr = be32toh(ctx->Yi.d[3]); 443 ctr = be32toh(ctx->Yi.d[3]);
1081 444
1082 n = ctx->mres; 445 if ((n = ctx->mres) > 0) {
1083 if (n) { 446 while (n > 0 && len > 0) {
1084 while (n && len) {
1085 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 447 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1086 --len;
1087 n = (n + 1) % 16; 448 n = (n + 1) % 16;
449 len--;
1088 } 450 }
1089 if (n == 0) 451 if (n > 0) {
1090 GCM_MUL(ctx, Xi);
1091 else {
1092 ctx->mres = n; 452 ctx->mres = n;
1093 return 0; 453 return 0;
1094 } 454 }
455 gcm_mul(ctx, ctx->Xi.u);
1095 } 456 }
1096#if defined(GHASH) && defined(GHASH_CHUNK) 457 if ((i = (len & (size_t)-16)) > 0) {
1097 while (len >= GHASH_CHUNK) { 458 j = i / 16;
1098 (*stream)(in, out, GHASH_CHUNK/16, key, ctx->Yi.c); 459 stream(in, out, j, ctx->key, ctx->Yi.c);
1099 ctr += GHASH_CHUNK/16;
1100 ctx->Yi.d[3] = htobe32(ctr);
1101 GHASH(ctx, out, GHASH_CHUNK);
1102 out += GHASH_CHUNK;
1103 in += GHASH_CHUNK;
1104 len -= GHASH_CHUNK;
1105 }
1106#endif
1107 if ((i = (len & (size_t)-16))) {
1108 size_t j = i/16;
1109
1110 (*stream)(in, out, j, key, ctx->Yi.c);
1111 ctr += (unsigned int)j; 460 ctr += (unsigned int)j;
1112 ctx->Yi.d[3] = htobe32(ctr); 461 ctx->Yi.d[3] = htobe32(ctr);
462 gcm_ghash(ctx, out, i);
1113 in += i; 463 in += i;
1114 len -= i;
1115#if defined(GHASH)
1116 GHASH(ctx, out, i);
1117 out += i; 464 out += i;
1118#else 465 len -= i;
1119 while (j--) {
1120 for (i = 0; i < 16; ++i)
1121 ctx->Xi.c[i] ^= out[i];
1122 GCM_MUL(ctx, Xi);
1123 out += 16;
1124 }
1125#endif
1126 } 466 }
1127 if (len) { 467 if (len > 0) {
1128 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key); 468 ctx->block(ctx->Yi.c, ctx->EKi.c, ctx->key);
1129 ++ctr; 469 ctx->Yi.d[3] = htobe32(++ctr);
1130 ctx->Yi.d[3] = htobe32(ctr); 470 while (len-- > 0) {
1131 while (len--) {
1132 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 471 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1133 ++n; 472 n++;
1134 } 473 }
1135 } 474 }
1136 475
1137 ctx->mres = n; 476 ctx->mres = n;
477
1138 return 0; 478 return 0;
1139} 479}
1140LCRYPTO_ALIAS(CRYPTO_gcm128_encrypt_ctr32); 480LCRYPTO_ALIAS(CRYPTO_gcm128_encrypt_ctr32);
1141 481
1142int 482int
1143CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, 483CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const unsigned char *in,
1144 const unsigned char *in, unsigned char *out, 484 unsigned char *out, size_t len, ctr128_f stream)
1145 size_t len, ctr128_f stream)
1146{ 485{
1147 unsigned int n, ctr; 486 unsigned int n, ctr;
1148 size_t i; 487 uint64_t mlen;
1149 u64 mlen = ctx->len.u[1]; 488 size_t i, j;
1150 void *key = ctx->key; 489 uint8_t c;
1151#ifdef GCM_FUNCREF_4BIT
1152 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1153# ifdef GHASH
1154 void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
1155 const u8 *inp, size_t len) = ctx->ghash;
1156# endif
1157#endif
1158 490
1159 mlen += len; 491 mlen = ctx->len.u[1] + len;
1160 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 492 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1161 return -1; 493 return -1;
1162 ctx->len.u[1] = mlen; 494 ctx->len.u[1] = mlen;
1163 495
1164 if (ctx->ares) { 496 if (ctx->ares > 0) {
1165 /* First call to decrypt finalizes GHASH(AAD) */ 497 /* First call to decrypt finalizes GHASH(AAD) */
1166 GCM_MUL(ctx, Xi); 498 gcm_mul(ctx, ctx->Xi.u);
1167 ctx->ares = 0; 499 ctx->ares = 0;
1168 } 500 }
1169 501
1170 ctr = be32toh(ctx->Yi.d[3]); 502 ctr = be32toh(ctx->Yi.d[3]);
1171 503
1172 n = ctx->mres; 504 if ((n = ctx->mres) > 0) {
1173 if (n) { 505 while (n > 0 && len > 0) {
1174 while (n && len) { 506 c = *(in++);
1175 u8 c = *(in++);
1176 *(out++) = c ^ ctx->EKi.c[n]; 507 *(out++) = c ^ ctx->EKi.c[n];
1177 ctx->Xi.c[n] ^= c; 508 ctx->Xi.c[n] ^= c;
1178 --len;
1179 n = (n + 1) % 16; 509 n = (n + 1) % 16;
510 len--;
1180 } 511 }
1181 if (n == 0) 512 if (n > 0) {
1182 GCM_MUL(ctx, Xi);
1183 else {
1184 ctx->mres = n; 513 ctx->mres = n;
1185 return 0; 514 return 0;
1186 } 515 }
516 gcm_mul(ctx, ctx->Xi.u);
1187 } 517 }
1188#if defined(GHASH) && defined(GHASH_CHUNK) 518 if ((i = (len & (size_t)-16)) > 0) {
1189 while (len >= GHASH_CHUNK) { 519 j = i / 16;
1190 GHASH(ctx, in, GHASH_CHUNK); 520 gcm_ghash(ctx, in, i);
1191 (*stream)(in, out, GHASH_CHUNK/16, key, ctx->Yi.c); 521 stream(in, out, j, ctx->key, ctx->Yi.c);
1192 ctr += GHASH_CHUNK/16;
1193 ctx->Yi.d[3] = htobe32(ctr);
1194 out += GHASH_CHUNK;
1195 in += GHASH_CHUNK;
1196 len -= GHASH_CHUNK;
1197 }
1198#endif
1199 if ((i = (len & (size_t)-16))) {
1200 size_t j = i/16;
1201
1202#if defined(GHASH)
1203 GHASH(ctx, in, i);
1204#else
1205 while (j--) {
1206 size_t k;
1207 for (k = 0; k < 16; ++k)
1208 ctx->Xi.c[k] ^= in[k];
1209 GCM_MUL(ctx, Xi);
1210 in += 16;
1211 }
1212 j = i/16;
1213 in -= i;
1214#endif
1215 (*stream)(in, out, j, key, ctx->Yi.c);
1216 ctr += (unsigned int)j; 522 ctr += (unsigned int)j;
1217 ctx->Yi.d[3] = htobe32(ctr); 523 ctx->Yi.d[3] = htobe32(ctr);
1218 out += i;
1219 in += i; 524 in += i;
525 out += i;
1220 len -= i; 526 len -= i;
1221 } 527 }
1222 if (len) { 528 if (len > 0) {
1223 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key); 529 ctx->block(ctx->Yi.c, ctx->EKi.c, ctx->key);
1224 ++ctr; 530 ctx->Yi.d[3] = htobe32(++ctr);
1225 ctx->Yi.d[3] = htobe32(ctr); 531 while (len-- > 0) {
1226 while (len--) { 532 c = in[n];
1227 u8 c = in[n];
1228 ctx->Xi.c[n] ^= c; 533 ctx->Xi.c[n] ^= c;
1229 out[n] = c ^ ctx->EKi.c[n]; 534 out[n] = c ^ ctx->EKi.c[n];
1230 ++n; 535 n++;
1231 } 536 }
1232 } 537 }
1233 538
1234 ctx->mres = n; 539 ctx->mres = n;
540
1235 return 0; 541 return 0;
1236} 542}
1237LCRYPTO_ALIAS(CRYPTO_gcm128_decrypt_ctr32); 543LCRYPTO_ALIAS(CRYPTO_gcm128_decrypt_ctr32);
@@ -1240,26 +546,25 @@ int
1240CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag, 546CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1241 size_t len) 547 size_t len)
1242{ 548{
1243 u64 alen = ctx->len.u[0] << 3; 549 uint64_t alen, clen;
1244 u64 clen = ctx->len.u[1] << 3;
1245#ifdef GCM_FUNCREF_4BIT
1246 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1247#endif
1248 550
1249 if (ctx->mres || ctx->ares) 551 alen = ctx->len.u[0] << 3;
1250 GCM_MUL(ctx, Xi); 552 clen = ctx->len.u[1] << 3;
553
554 if (ctx->ares > 0 || ctx->mres > 0)
555 gcm_mul(ctx, ctx->Xi.u);
1251 556
1252 ctx->Xi.u[0] ^= htobe64(alen); 557 ctx->Xi.u[0] ^= htobe64(alen);
1253 ctx->Xi.u[1] ^= htobe64(clen); 558 ctx->Xi.u[1] ^= htobe64(clen);
1254 GCM_MUL(ctx, Xi); 559 gcm_mul(ctx, ctx->Xi.u);
1255 560
1256 ctx->Xi.u[0] ^= ctx->EK0.u[0]; 561 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1257 ctx->Xi.u[1] ^= ctx->EK0.u[1]; 562 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1258 563
1259 if (tag && len <= sizeof(ctx->Xi)) 564 if (tag == NULL || len > sizeof(ctx->Xi))
1260 return memcmp(ctx->Xi.c, tag, len);
1261 else
1262 return -1; 565 return -1;
566
567 return timingsafe_memcmp(ctx->Xi.c, tag, len);
1263} 568}
1264LCRYPTO_ALIAS(CRYPTO_gcm128_finish); 569LCRYPTO_ALIAS(CRYPTO_gcm128_finish);
1265 570
@@ -1267,26 +572,10 @@ void
1267CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) 572CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1268{ 573{
1269 CRYPTO_gcm128_finish(ctx, NULL, 0); 574 CRYPTO_gcm128_finish(ctx, NULL, 0);
1270 memcpy(tag, ctx->Xi.c,
1271 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1272}
1273LCRYPTO_ALIAS(CRYPTO_gcm128_tag);
1274 575
1275GCM128_CONTEXT * 576 if (len > sizeof(ctx->Xi.c))
1276CRYPTO_gcm128_new(void *key, block128_f block) 577 len = sizeof(ctx->Xi.c);
1277{
1278 GCM128_CONTEXT *ret;
1279
1280 if ((ret = malloc(sizeof(GCM128_CONTEXT))))
1281 CRYPTO_gcm128_init(ret, key, block);
1282
1283 return ret;
1284}
1285LCRYPTO_ALIAS(CRYPTO_gcm128_new);
1286 578
1287void 579 memcpy(tag, ctx->Xi.c, len);
1288CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1289{
1290 freezero(ctx, sizeof(*ctx));
1291} 580}
1292LCRYPTO_ALIAS(CRYPTO_gcm128_release); 581LCRYPTO_ALIAS(CRYPTO_gcm128_tag);
diff --git a/src/lib/libcrypto/modes/gcm128_amd64.c b/src/lib/libcrypto/modes/gcm128_amd64.c
new file mode 100644
index 0000000000..eaa66fb32f
--- /dev/null
+++ b/src/lib/libcrypto/modes/gcm128_amd64.c
@@ -0,0 +1,44 @@
1/* $OpenBSD: gcm128_amd64.c,v 1.1 2025/06/28 12:39:10 jsing Exp $ */
2/*
3 * Copyright (c) 2025 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include "crypto_arch.h"
19#include "modes_local.h"
20
21void gcm_init_4bit(u128 Htable[16], uint64_t H[2]);
22void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
23void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
24 size_t len);
25
26void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
27void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
28void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
29 size_t len);
30
31void
32gcm128_init(GCM128_CONTEXT *ctx)
33{
34 if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_CLMUL) != 0) {
35 gcm_init_clmul(ctx->Htable, ctx->H.u);
36 ctx->gmult = gcm_gmult_clmul;
37 ctx->ghash = gcm_ghash_clmul;
38 return;
39 }
40
41 gcm_init_4bit(ctx->Htable, ctx->H.u);
42 ctx->gmult = gcm_gmult_4bit;
43 ctx->ghash = gcm_ghash_4bit;
44}
diff --git a/src/lib/libcrypto/modes/gcm128_i386.c b/src/lib/libcrypto/modes/gcm128_i386.c
new file mode 100644
index 0000000000..14b0b9ce64
--- /dev/null
+++ b/src/lib/libcrypto/modes/gcm128_i386.c
@@ -0,0 +1,56 @@
1/* $OpenBSD: gcm128_i386.c,v 1.2 2025/12/31 10:16:24 jsing Exp $ */
2/*
3 * Copyright (c) 2025 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include "crypto_arch.h"
19#include "modes_local.h"
20
21void gcm_init_4bit(u128 Htable[16], uint64_t H[2]);
22
23void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]);
24void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
25 size_t len);
26
27void gcm_gmult_4bit_x86(uint64_t Xi[2], const u128 Htable[16]);
28void gcm_ghash_4bit_x86(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
29 size_t len);
30
31void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
32void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
33void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
34 size_t len);
35
36void
37gcm128_init(GCM128_CONTEXT *ctx)
38{
39 if ((crypto_cpu_caps_i386 & CRYPTO_CPU_CAPS_I386_CLMUL) != 0) {
40 gcm_init_clmul(ctx->Htable, ctx->H.u);
41 ctx->gmult = gcm_gmult_clmul;
42 ctx->ghash = gcm_ghash_clmul;
43 return;
44 }
45
46 if ((crypto_cpu_caps_i386 & CRYPTO_CPU_CAPS_I386_SSE) != 0) {
47 gcm_init_4bit(ctx->Htable, ctx->H.u);
48 ctx->gmult = gcm_gmult_4bit_mmx;
49 ctx->ghash = gcm_ghash_4bit_mmx;
50 return;
51 }
52
53 gcm_init_4bit(ctx->Htable, ctx->H.u);
54 ctx->gmult = gcm_gmult_4bit_x86;
55 ctx->ghash = gcm_ghash_4bit_x86;
56}
diff --git a/src/lib/libcrypto/modes/modes_local.h b/src/lib/libcrypto/modes/modes_local.h
index c04db034d0..df699d3e4c 100644
--- a/src/lib/libcrypto/modes/modes_local.h
+++ b/src/lib/libcrypto/modes/modes_local.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: modes_local.h,v 1.4 2025/04/23 14:15:19 jsing Exp $ */ 1/* $OpenBSD: modes_local.h,v 1.8 2025/11/26 10:19:57 tb Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2010 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -6,6 +6,9 @@
6 * ==================================================================== 6 * ====================================================================
7 */ 7 */
8 8
9#ifndef HEADER_MODES_LOCAL_H
10#define HEADER_MODES_LOCAL_H
11
9#include <endian.h> 12#include <endian.h>
10 13
11#include <openssl/opensslconf.h> 14#include <openssl/opensslconf.h>
@@ -15,69 +18,51 @@
15__BEGIN_HIDDEN_DECLS 18__BEGIN_HIDDEN_DECLS
16 19
17#if defined(_LP64) 20#if defined(_LP64)
18typedef long i64;
19typedef unsigned long u64;
20#define U64(C) C##UL 21#define U64(C) C##UL
21#else 22#else
22typedef long long i64;
23typedef unsigned long long u64;
24#define U64(C) C##ULL 23#define U64(C) C##ULL
25#endif 24#endif
26 25
27typedef unsigned int u32;
28typedef unsigned char u8;
29
30/* GCM definitions */ 26/* GCM definitions */
31 27
32typedef struct { 28typedef struct {
33 u64 hi, lo; 29 uint64_t hi, lo;
34} u128; 30} u128;
35 31
36#ifdef TABLE_BITS
37#undef TABLE_BITS
38#endif
39/*
40 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
41 * never be set to 8 [or 1]. For further information see gcm128.c.
42 */
43#define TABLE_BITS 4
44
45struct gcm128_context { 32struct gcm128_context {
46 /* Following 6 names follow names in GCM specification */ 33 /* Following 6 names follow names in GCM specification */
47 union { 34 union {
48 u64 u[2]; 35 uint64_t u[2];
49 u32 d[4]; 36 uint32_t d[4];
50 u8 c[16]; 37 uint8_t c[16];
51 size_t t[16/sizeof(size_t)]; 38 size_t t[16/sizeof(size_t)];
52 } Yi, EKi, EK0, len, Xi, H; 39 } Yi, EKi, EK0, len, Xi, H;
53 /* Relative position of Xi, H and pre-computed Htable is used 40 /* Relative position of Xi, H and pre-computed Htable is used
54 * in some assembler modules, i.e. don't change the order! */ 41 * in some assembler modules, i.e. don't change the order! */
55#if TABLE_BITS==8
56 u128 Htable[256];
57#else
58 u128 Htable[16]; 42 u128 Htable[16];
59 void (*gmult)(u64 Xi[2], const u128 Htable[16]); 43 void (*gmult)(uint64_t Xi[2], const u128 Htable[16]);
60 void (*ghash)(u64 Xi[2], const u128 Htable[16], const u8 *inp, 44 void (*ghash)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
61 size_t len); 45 size_t len);
62#endif
63 unsigned int mres, ares; 46 unsigned int mres, ares;
64 block128_f block; 47 block128_f block;
65 void *key; 48 void *key;
66}; 49};
67 50
68struct xts128_context { 51struct xts128_context {
69 void *key1, *key2; 52 const void *key1, *key2;
70 block128_f block1, block2; 53 block128_f block1, block2;
71}; 54};
72 55
73struct ccm128_context { 56struct ccm128_context {
74 union { 57 union {
75 u64 u[2]; 58 uint64_t u[2];
76 u8 c[16]; 59 uint8_t c[16];
77 } nonce, cmac; 60 } nonce, cmac;
78 u64 blocks; 61 uint64_t blocks;
79 block128_f block; 62 block128_f block;
80 void *key; 63 void *key;
81}; 64};
82 65
83__END_HIDDEN_DECLS 66__END_HIDDEN_DECLS
67
68#endif /* HEADER_MODES_LOCAL_H */
diff --git a/src/lib/libcrypto/modes/xts128.c b/src/lib/libcrypto/modes/xts128.c
index 789af9ef65..9c863e73d6 100644
--- a/src/lib/libcrypto/modes/xts128.c
+++ b/src/lib/libcrypto/modes/xts128.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: xts128.c,v 1.14 2025/04/21 16:01:18 jsing Exp $ */ 1/* $OpenBSD: xts128.c,v 1.15 2025/05/18 09:05:59 jsing Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2011 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -61,9 +61,9 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
61 size_t len, int enc) 61 size_t len, int enc)
62{ 62{
63 union { 63 union {
64 u64 u[2]; 64 uint64_t u[2];
65 u32 d[4]; 65 uint32_t d[4];
66 u8 c[16]; 66 uint8_t c[16];
67 } tweak, scratch; 67 } tweak, scratch;
68 unsigned int i; 68 unsigned int i;
69 69
@@ -83,8 +83,8 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
83 scratch.u[0] ^= tweak.u[0]; 83 scratch.u[0] ^= tweak.u[0];
84 scratch.u[1] ^= tweak.u[1]; 84 scratch.u[1] ^= tweak.u[1];
85#else 85#else
86 scratch.u[0] = ((u64 *)inp)[0] ^ tweak.u[0]; 86 scratch.u[0] = ((uint64_t *)inp)[0] ^ tweak.u[0];
87 scratch.u[1] = ((u64 *)inp)[1] ^ tweak.u[1]; 87 scratch.u[1] = ((uint64_t *)inp)[1] ^ tweak.u[1];
88#endif 88#endif
89 (*ctx->block1)(scratch.c, scratch.c, ctx->key1); 89 (*ctx->block1)(scratch.c, scratch.c, ctx->key1);
90#ifdef __STRICT_ALIGNMENT 90#ifdef __STRICT_ALIGNMENT
@@ -92,8 +92,8 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
92 scratch.u[1] ^= tweak.u[1]; 92 scratch.u[1] ^= tweak.u[1];
93 memcpy(out, scratch.c, 16); 93 memcpy(out, scratch.c, 16);
94#else 94#else
95 ((u64 *)out)[0] = scratch.u[0] ^= tweak.u[0]; 95 ((uint64_t *)out)[0] = scratch.u[0] ^= tweak.u[0];
96 ((u64 *)out)[1] = scratch.u[1] ^= tweak.u[1]; 96 ((uint64_t *)out)[1] = scratch.u[1] ^= tweak.u[1];
97#endif 97#endif
98 inp += 16; 98 inp += 16;
99 out += 16; 99 out += 16;
@@ -115,15 +115,15 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
115 for (c = 0, i = 0; i < 16; ++i) { 115 for (c = 0, i = 0; i < 16; ++i) {
116 /*+ substitutes for |, because c is 1 bit */ 116 /*+ substitutes for |, because c is 1 bit */
117 c += ((size_t)tweak.c[i]) << 1; 117 c += ((size_t)tweak.c[i]) << 1;
118 tweak.c[i] = (u8)c; 118 tweak.c[i] = (uint8_t)c;
119 c = c >> 8; 119 c = c >> 8;
120 } 120 }
121 tweak.c[0] ^= (u8)(0x87 & (0 - c)); 121 tweak.c[0] ^= (uint8_t)(0x87 & (0 - c));
122#endif 122#endif
123 } 123 }
124 if (enc) { 124 if (enc) {
125 for (i = 0; i < len; ++i) { 125 for (i = 0; i < len; ++i) {
126 u8 ch = inp[i]; 126 uint8_t ch = inp[i];
127 out[i] = scratch.c[i]; 127 out[i] = scratch.c[i];
128 scratch.c[i] = ch; 128 scratch.c[i] = ch;
129 } 129 }
@@ -135,8 +135,8 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
135 memcpy(out - 16, scratch.c, 16); 135 memcpy(out - 16, scratch.c, 16);
136 } else { 136 } else {
137 union { 137 union {
138 u64 u[2]; 138 uint64_t u[2];
139 u8 c[16]; 139 uint8_t c[16];
140 } tweak1; 140 } tweak1;
141 141
142#if BYTE_ORDER == LITTLE_ENDIAN 142#if BYTE_ORDER == LITTLE_ENDIAN
@@ -152,25 +152,25 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
152 for (c = 0, i = 0; i < 16; ++i) { 152 for (c = 0, i = 0; i < 16; ++i) {
153 /*+ substitutes for |, because c is 1 bit */ 153 /*+ substitutes for |, because c is 1 bit */
154 c += ((size_t)tweak.c[i]) << 1; 154 c += ((size_t)tweak.c[i]) << 1;
155 tweak1.c[i] = (u8)c; 155 tweak1.c[i] = (uint8_t)c;
156 c = c >> 8; 156 c = c >> 8;
157 } 157 }
158 tweak1.c[0] ^= (u8)(0x87 & (0 - c)); 158 tweak1.c[0] ^= (uint8_t)(0x87 & (0 - c));
159#endif 159#endif
160#ifdef __STRICT_ALIGNMENT 160#ifdef __STRICT_ALIGNMENT
161 memcpy(scratch.c, inp, 16); 161 memcpy(scratch.c, inp, 16);
162 scratch.u[0] ^= tweak1.u[0]; 162 scratch.u[0] ^= tweak1.u[0];
163 scratch.u[1] ^= tweak1.u[1]; 163 scratch.u[1] ^= tweak1.u[1];
164#else 164#else
165 scratch.u[0] = ((u64 *)inp)[0] ^ tweak1.u[0]; 165 scratch.u[0] = ((uint64_t *)inp)[0] ^ tweak1.u[0];
166 scratch.u[1] = ((u64 *)inp)[1] ^ tweak1.u[1]; 166 scratch.u[1] = ((uint64_t *)inp)[1] ^ tweak1.u[1];
167#endif 167#endif
168 (*ctx->block1)(scratch.c, scratch.c, ctx->key1); 168 (*ctx->block1)(scratch.c, scratch.c, ctx->key1);
169 scratch.u[0] ^= tweak1.u[0]; 169 scratch.u[0] ^= tweak1.u[0];
170 scratch.u[1] ^= tweak1.u[1]; 170 scratch.u[1] ^= tweak1.u[1];
171 171
172 for (i = 0; i < len; ++i) { 172 for (i = 0; i < len; ++i) {
173 u8 ch = inp[16 + i]; 173 uint8_t ch = inp[16 + i];
174 out[16 + i] = scratch.c[i]; 174 out[16 + i] = scratch.c[i];
175 scratch.c[i] = ch; 175 scratch.c[i] = ch;
176 } 176 }
@@ -182,8 +182,8 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
182 scratch.u[1] ^= tweak.u[1]; 182 scratch.u[1] ^= tweak.u[1];
183 memcpy(out, scratch.c, 16); 183 memcpy(out, scratch.c, 16);
184#else 184#else
185 ((u64 *)out)[0] = scratch.u[0] ^ tweak.u[0]; 185 ((uint64_t *)out)[0] = scratch.u[0] ^ tweak.u[0];
186 ((u64 *)out)[1] = scratch.u[1] ^ tweak.u[1]; 186 ((uint64_t *)out)[1] = scratch.u[1] ^ tweak.u[1];
187#endif 187#endif
188 } 188 }
189 189