summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/modes
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/modes')
-rw-r--r--src/lib/libcrypto/modes/asm/ghash-x86.pl3
-rw-r--r--src/lib/libcrypto/modes/ccm128.c90
-rw-r--r--src/lib/libcrypto/modes/ctr128.c12
-rw-r--r--src/lib/libcrypto/modes/gcm128.c1070
-rw-r--r--src/lib/libcrypto/modes/gcm128_amd64.c44
-rw-r--r--src/lib/libcrypto/modes/gcm128_i386.c56
-rw-r--r--src/lib/libcrypto/modes/modes_local.h47
-rw-r--r--src/lib/libcrypto/modes/xts128.c40
8 files changed, 371 insertions, 991 deletions
diff --git a/src/lib/libcrypto/modes/asm/ghash-x86.pl b/src/lib/libcrypto/modes/asm/ghash-x86.pl
index 47833582b6..395c680cc5 100644
--- a/src/lib/libcrypto/modes/asm/ghash-x86.pl
+++ b/src/lib/libcrypto/modes/asm/ghash-x86.pl
@@ -119,8 +119,7 @@ require "x86asm.pl";
119 119
120&asm_init($ARGV[0],"ghash-x86.pl",$x86only = $ARGV[$#ARGV] eq "386"); 120&asm_init($ARGV[0],"ghash-x86.pl",$x86only = $ARGV[$#ARGV] eq "386");
121 121
122$sse2=0; 122$sse2=1;
123for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
124 123
125($Zhh,$Zhl,$Zlh,$Zll) = ("ebp","edx","ecx","ebx"); 124($Zhh,$Zhl,$Zlh,$Zll) = ("ebp","edx","ecx","ebx");
126$inp = "edi"; 125$inp = "edi";
diff --git a/src/lib/libcrypto/modes/ccm128.c b/src/lib/libcrypto/modes/ccm128.c
index 0f592dd9e5..e27681ee62 100644
--- a/src/lib/libcrypto/modes/ccm128.c
+++ b/src/lib/libcrypto/modes/ccm128.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: ccm128.c,v 1.10 2025/04/21 16:01:18 jsing Exp $ */ 1/* $OpenBSD: ccm128.c,v 1.12 2025/05/18 09:21:29 bcook Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2011 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -61,7 +61,7 @@ CRYPTO_ccm128_init(CCM128_CONTEXT *ctx,
61 unsigned int M, unsigned int L, void *key, block128_f block) 61 unsigned int M, unsigned int L, void *key, block128_f block)
62{ 62{
63 memset(ctx->nonce.c, 0, sizeof(ctx->nonce.c)); 63 memset(ctx->nonce.c, 0, sizeof(ctx->nonce.c));
64 ctx->nonce.c[0] = ((u8)(L - 1) & 7) | (u8)(((M - 2)/2) & 7) << 3; 64 ctx->nonce.c[0] = ((uint8_t)(L - 1) & 7) | (uint8_t)(((M - 2)/2) & 7) << 3;
65 ctx->blocks = 0; 65 ctx->blocks = 0;
66 ctx->block = block; 66 ctx->block = block;
67 ctx->key = key; 67 ctx->key = key;
@@ -81,17 +81,17 @@ CRYPTO_ccm128_setiv(CCM128_CONTEXT *ctx,
81 return -1; /* nonce is too short */ 81 return -1; /* nonce is too short */
82 82
83 if (sizeof(mlen) == 8 && L >= 3) { 83 if (sizeof(mlen) == 8 && L >= 3) {
84 ctx->nonce.c[8] = (u8)(mlen >> (56 % (sizeof(mlen)*8))); 84 ctx->nonce.c[8] = (uint8_t)(mlen >> (56 % (sizeof(mlen)*8)));
85 ctx->nonce.c[9] = (u8)(mlen >> (48 % (sizeof(mlen)*8))); 85 ctx->nonce.c[9] = (uint8_t)(mlen >> (48 % (sizeof(mlen)*8)));
86 ctx->nonce.c[10] = (u8)(mlen >> (40 % (sizeof(mlen)*8))); 86 ctx->nonce.c[10] = (uint8_t)(mlen >> (40 % (sizeof(mlen)*8)));
87 ctx->nonce.c[11] = (u8)(mlen >> (32 % (sizeof(mlen)*8))); 87 ctx->nonce.c[11] = (uint8_t)(mlen >> (32 % (sizeof(mlen)*8)));
88 } else 88 } else
89 ctx->nonce.u[1] = 0; 89 ctx->nonce.u[1] = 0;
90 90
91 ctx->nonce.c[12] = (u8)(mlen >> 24); 91 ctx->nonce.c[12] = (uint8_t)(mlen >> 24);
92 ctx->nonce.c[13] = (u8)(mlen >> 16); 92 ctx->nonce.c[13] = (uint8_t)(mlen >> 16);
93 ctx->nonce.c[14] = (u8)(mlen >> 8); 93 ctx->nonce.c[14] = (uint8_t)(mlen >> 8);
94 ctx->nonce.c[15] = (u8)mlen; 94 ctx->nonce.c[15] = (uint8_t)mlen;
95 95
96 ctx->nonce.c[0] &= ~0x40; /* clear Adata flag */ 96 ctx->nonce.c[0] &= ~0x40; /* clear Adata flag */
97 memcpy(&ctx->nonce.c[1], nonce, 14 - L); 97 memcpy(&ctx->nonce.c[1], nonce, 14 - L);
@@ -116,29 +116,29 @@ CRYPTO_ccm128_aad(CCM128_CONTEXT *ctx,
116 ctx->blocks++; 116 ctx->blocks++;
117 117
118 if (alen < (0x10000 - 0x100)) { 118 if (alen < (0x10000 - 0x100)) {
119 ctx->cmac.c[0] ^= (u8)(alen >> 8); 119 ctx->cmac.c[0] ^= (uint8_t)(alen >> 8);
120 ctx->cmac.c[1] ^= (u8)alen; 120 ctx->cmac.c[1] ^= (uint8_t)alen;
121 i = 2; 121 i = 2;
122 } else if (sizeof(alen) == 8 && 122 } else if (sizeof(alen) == 8 &&
123 alen >= (size_t)1 << (32 % (sizeof(alen)*8))) { 123 alen >= (size_t)1 << (32 % (sizeof(alen)*8))) {
124 ctx->cmac.c[0] ^= 0xFF; 124 ctx->cmac.c[0] ^= 0xFF;
125 ctx->cmac.c[1] ^= 0xFF; 125 ctx->cmac.c[1] ^= 0xFF;
126 ctx->cmac.c[2] ^= (u8)(alen >> (56 % (sizeof(alen)*8))); 126 ctx->cmac.c[2] ^= (uint8_t)(alen >> (56 % (sizeof(alen)*8)));
127 ctx->cmac.c[3] ^= (u8)(alen >> (48 % (sizeof(alen)*8))); 127 ctx->cmac.c[3] ^= (uint8_t)(alen >> (48 % (sizeof(alen)*8)));
128 ctx->cmac.c[4] ^= (u8)(alen >> (40 % (sizeof(alen)*8))); 128 ctx->cmac.c[4] ^= (uint8_t)(alen >> (40 % (sizeof(alen)*8)));
129 ctx->cmac.c[5] ^= (u8)(alen >> (32 % (sizeof(alen)*8))); 129 ctx->cmac.c[5] ^= (uint8_t)(alen >> (32 % (sizeof(alen)*8)));
130 ctx->cmac.c[6] ^= (u8)(alen >> 24); 130 ctx->cmac.c[6] ^= (uint8_t)(alen >> 24);
131 ctx->cmac.c[7] ^= (u8)(alen >> 16); 131 ctx->cmac.c[7] ^= (uint8_t)(alen >> 16);
132 ctx->cmac.c[8] ^= (u8)(alen >> 8); 132 ctx->cmac.c[8] ^= (uint8_t)(alen >> 8);
133 ctx->cmac.c[9] ^= (u8)alen; 133 ctx->cmac.c[9] ^= (uint8_t)alen;
134 i = 10; 134 i = 10;
135 } else { 135 } else {
136 ctx->cmac.c[0] ^= 0xFF; 136 ctx->cmac.c[0] ^= 0xFF;
137 ctx->cmac.c[1] ^= 0xFE; 137 ctx->cmac.c[1] ^= 0xFE;
138 ctx->cmac.c[2] ^= (u8)(alen >> 24); 138 ctx->cmac.c[2] ^= (uint8_t)(alen >> 24);
139 ctx->cmac.c[3] ^= (u8)(alen >> 16); 139 ctx->cmac.c[3] ^= (uint8_t)(alen >> 16);
140 ctx->cmac.c[4] ^= (u8)(alen >> 8); 140 ctx->cmac.c[4] ^= (uint8_t)(alen >> 8);
141 ctx->cmac.c[5] ^= (u8)alen; 141 ctx->cmac.c[5] ^= (uint8_t)alen;
142 i = 6; 142 i = 6;
143 } 143 }
144 144
@@ -160,7 +160,7 @@ static void
160ctr64_inc(unsigned char *counter) 160ctr64_inc(unsigned char *counter)
161{ 161{
162 unsigned int n = 8; 162 unsigned int n = 8;
163 u8 c; 163 uint8_t c;
164 164
165 counter += 8; 165 counter += 8;
166 do { 166 do {
@@ -184,8 +184,8 @@ CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx,
184 block128_f block = ctx->block; 184 block128_f block = ctx->block;
185 void *key = ctx->key; 185 void *key = ctx->key;
186 union { 186 union {
187 u64 u[2]; 187 uint64_t u[2];
188 u8 c[16]; 188 uint8_t c[16];
189 } scratch; 189 } scratch;
190 190
191 if (!(flags0 & 0x40)) 191 if (!(flags0 & 0x40))
@@ -211,16 +211,16 @@ CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx,
211 while (len >= 16) { 211 while (len >= 16) {
212#ifdef __STRICT_ALIGNMENT 212#ifdef __STRICT_ALIGNMENT
213 union { 213 union {
214 u64 u[2]; 214 uint64_t u[2];
215 u8 c[16]; 215 uint8_t c[16];
216 } temp; 216 } temp;
217 217
218 memcpy(temp.c, inp, 16); 218 memcpy(temp.c, inp, 16);
219 ctx->cmac.u[0] ^= temp.u[0]; 219 ctx->cmac.u[0] ^= temp.u[0];
220 ctx->cmac.u[1] ^= temp.u[1]; 220 ctx->cmac.u[1] ^= temp.u[1];
221#else 221#else
222 ctx->cmac.u[0] ^= ((u64 *)inp)[0]; 222 ctx->cmac.u[0] ^= ((uint64_t *)inp)[0];
223 ctx->cmac.u[1] ^= ((u64 *)inp)[1]; 223 ctx->cmac.u[1] ^= ((uint64_t *)inp)[1];
224#endif 224#endif
225 (*block)(ctx->cmac.c, ctx->cmac.c, key); 225 (*block)(ctx->cmac.c, ctx->cmac.c, key);
226 (*block)(ctx->nonce.c, scratch.c, key); 226 (*block)(ctx->nonce.c, scratch.c, key);
@@ -230,8 +230,8 @@ CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx,
230 temp.u[1] ^= scratch.u[1]; 230 temp.u[1] ^= scratch.u[1];
231 memcpy(out, temp.c, 16); 231 memcpy(out, temp.c, 16);
232#else 232#else
233 ((u64 *)out)[0] = scratch.u[0] ^ ((u64 *)inp)[0]; 233 ((uint64_t *)out)[0] = scratch.u[0] ^ ((uint64_t *)inp)[0];
234 ((u64 *)out)[1] = scratch.u[1] ^ ((u64 *)inp)[1]; 234 ((uint64_t *)out)[1] = scratch.u[1] ^ ((uint64_t *)inp)[1];
235#endif 235#endif
236 inp += 16; 236 inp += 16;
237 out += 16; 237 out += 16;
@@ -271,8 +271,8 @@ CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx,
271 block128_f block = ctx->block; 271 block128_f block = ctx->block;
272 void *key = ctx->key; 272 void *key = ctx->key;
273 union { 273 union {
274 u64 u[2]; 274 uint64_t u[2];
275 u8 c[16]; 275 uint8_t c[16];
276 } scratch; 276 } scratch;
277 277
278 if (!(flags0 & 0x40)) 278 if (!(flags0 & 0x40))
@@ -293,8 +293,8 @@ CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx,
293 while (len >= 16) { 293 while (len >= 16) {
294#ifdef __STRICT_ALIGNMENT 294#ifdef __STRICT_ALIGNMENT
295 union { 295 union {
296 u64 u[2]; 296 uint64_t u[2];
297 u8 c[16]; 297 uint8_t c[16];
298 } temp; 298 } temp;
299#endif 299#endif
300 (*block)(ctx->nonce.c, scratch.c, key); 300 (*block)(ctx->nonce.c, scratch.c, key);
@@ -305,10 +305,10 @@ CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx,
305 ctx->cmac.u[1] ^= (scratch.u[1] ^= temp.u[1]); 305 ctx->cmac.u[1] ^= (scratch.u[1] ^= temp.u[1]);
306 memcpy(out, scratch.c, 16); 306 memcpy(out, scratch.c, 16);
307#else 307#else
308 ctx->cmac.u[0] ^= (((u64 *)out)[0] = scratch.u[0] ^ 308 ctx->cmac.u[0] ^= (((uint64_t *)out)[0] = scratch.u[0] ^
309 ((u64 *)inp)[0]); 309 ((uint64_t *)inp)[0]);
310 ctx->cmac.u[1] ^= (((u64 *)out)[1] = scratch.u[1] ^ 310 ctx->cmac.u[1] ^= (((uint64_t *)out)[1] = scratch.u[1] ^
311 ((u64 *)inp)[1]); 311 ((uint64_t *)inp)[1]);
312#endif 312#endif
313 (*block)(ctx->cmac.c, ctx->cmac.c, key); 313 (*block)(ctx->cmac.c, ctx->cmac.c, key);
314 314
@@ -363,8 +363,8 @@ CRYPTO_ccm128_encrypt_ccm64(CCM128_CONTEXT *ctx,
363 block128_f block = ctx->block; 363 block128_f block = ctx->block;
364 void *key = ctx->key; 364 void *key = ctx->key;
365 union { 365 union {
366 u64 u[2]; 366 uint64_t u[2];
367 u8 c[16]; 367 uint8_t c[16];
368 } scratch; 368 } scratch;
369 369
370 if (!(flags0 & 0x40)) 370 if (!(flags0 & 0x40))
@@ -430,8 +430,8 @@ CRYPTO_ccm128_decrypt_ccm64(CCM128_CONTEXT *ctx,
430 block128_f block = ctx->block; 430 block128_f block = ctx->block;
431 void *key = ctx->key; 431 void *key = ctx->key;
432 union { 432 union {
433 u64 u[2]; 433 uint64_t u[2];
434 u8 c[16]; 434 uint8_t c[16];
435 } scratch; 435 } scratch;
436 436
437 if (!(flags0 & 0x40)) 437 if (!(flags0 & 0x40))
diff --git a/src/lib/libcrypto/modes/ctr128.c b/src/lib/libcrypto/modes/ctr128.c
index 30563ed6e3..87d9abb355 100644
--- a/src/lib/libcrypto/modes/ctr128.c
+++ b/src/lib/libcrypto/modes/ctr128.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: ctr128.c,v 1.17 2025/04/23 10:09:08 jsing Exp $ */ 1/* $OpenBSD: ctr128.c,v 1.18 2025/05/18 09:05:59 jsing Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2008 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -63,8 +63,8 @@
63static void 63static void
64ctr128_inc(unsigned char *counter) 64ctr128_inc(unsigned char *counter)
65{ 65{
66 u32 n = 16; 66 uint32_t n = 16;
67 u8 c; 67 uint8_t c;
68 68
69 do { 69 do {
70 --n; 70 --n;
@@ -175,8 +175,8 @@ LCRYPTO_ALIAS(CRYPTO_ctr128_encrypt);
175static void 175static void
176ctr96_inc(unsigned char *counter) 176ctr96_inc(unsigned char *counter)
177{ 177{
178 u32 n = 12; 178 uint32_t n = 12;
179 u8 c; 179 uint8_t c;
180 180
181 do { 181 do {
182 --n; 182 --n;
@@ -223,7 +223,7 @@ CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out,
223 * overflow, which is then handled by limiting the 223 * overflow, which is then handled by limiting the
224 * amount of blocks to the exact overflow point... 224 * amount of blocks to the exact overflow point...
225 */ 225 */
226 ctr32 += (u32)blocks; 226 ctr32 += (uint32_t)blocks;
227 if (ctr32 < blocks) { 227 if (ctr32 < blocks) {
228 blocks -= ctr32; 228 blocks -= ctr32;
229 ctr32 = 0; 229 ctr32 = 0;
diff --git a/src/lib/libcrypto/modes/gcm128.c b/src/lib/libcrypto/modes/gcm128.c
index 21ba9eef57..a88f589b00 100644
--- a/src/lib/libcrypto/modes/gcm128.c
+++ b/src/lib/libcrypto/modes/gcm128.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: gcm128.c,v 1.35 2025/04/25 12:08:53 jsing Exp $ */ 1/* $OpenBSD: gcm128.c,v 1.55 2026/01/17 14:30:37 jsing Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2010 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -52,187 +52,16 @@
52 52
53#include <openssl/crypto.h> 53#include <openssl/crypto.h>
54 54
55#include "crypto_arch.h"
55#include "crypto_internal.h" 56#include "crypto_internal.h"
56#include "modes_local.h" 57#include "modes_local.h"
57 58
58#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16)) 59void
59#define REDUCE1BIT(V) \ 60gcm_init_4bit(u128 Htable[16], uint64_t H[2])
60 do { \
61 if (sizeof(size_t)==8) { \
62 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
63 V.lo = (V.hi<<63)|(V.lo>>1); \
64 V.hi = (V.hi>>1 )^T; \
65 } else { \
66 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
67 V.lo = (V.hi<<63)|(V.lo>>1); \
68 V.hi = (V.hi>>1 )^((u64)T<<32); \
69 } \
70 } while(0)
71
72/*
73 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
74 * never be set to 8. 8 is effectively reserved for testing purposes.
75 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
76 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
77 * whole spectrum of possible table driven implementations. Why? In
78 * non-"Shoup's" case memory access pattern is segmented in such manner,
79 * that it's trivial to see that cache timing information can reveal
80 * fair portion of intermediate hash value. Given that ciphertext is
81 * always available to attacker, it's possible for him to attempt to
82 * deduce secret parameter H and if successful, tamper with messages
83 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
84 * not as trivial, but there is no reason to believe that it's resistant
85 * to cache-timing attack. And the thing about "8-bit" implementation is
86 * that it consumes 16 (sixteen) times more memory, 4KB per individual
87 * key + 1KB shared. Well, on pros side it should be twice as fast as
88 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
89 * was observed to run ~75% faster, closer to 100% for commercial
90 * compilers... Yet "4-bit" procedure is preferred, because it's
91 * believed to provide better security-performance balance and adequate
92 * all-round performance. "All-round" refers to things like:
93 *
94 * - shorter setup time effectively improves overall timing for
95 * handling short messages;
96 * - larger table allocation can become unbearable because of VM
97 * subsystem penalties (for example on Windows large enough free
98 * results in VM working set trimming, meaning that consequent
99 * malloc would immediately incur working set expansion);
100 * - larger table has larger cache footprint, which can affect
101 * performance of other code paths (not necessarily even from same
102 * thread in Hyper-Threading world);
103 *
104 * Value of 1 is not appropriate for performance reasons.
105 */
106#if TABLE_BITS==8
107
108static void
109gcm_init_8bit(u128 Htable[256], u64 H[2])
110{
111 int i, j;
112 u128 V;
113
114 Htable[0].hi = 0;
115 Htable[0].lo = 0;
116 V.hi = H[0];
117 V.lo = H[1];
118
119 for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
120 REDUCE1BIT(V);
121 Htable[i] = V;
122 }
123
124 for (i = 2; i < 256; i <<= 1) {
125 u128 *Hi = Htable + i, H0 = *Hi;
126 for (j = 1; j < i; ++j) {
127 Hi[j].hi = H0.hi ^ Htable[j].hi;
128 Hi[j].lo = H0.lo ^ Htable[j].lo;
129 }
130 }
131}
132
133static void
134gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
135{
136 u128 Z = { 0, 0};
137 const u8 *xi = (const u8 *)Xi + 15;
138 size_t rem, n = *xi;
139 static const size_t rem_8bit[256] = {
140 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
141 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
142 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
143 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
144 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
145 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
146 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
147 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
148 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
149 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
150 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
151 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
152 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
153 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
154 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
155 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
156 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
157 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
158 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
159 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
160 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
161 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
162 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
163 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
164 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
165 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
166 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
167 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
168 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
169 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
170 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
171 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
172 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
173 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
174 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
175 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
176 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
177 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
178 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
179 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
180 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
181 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
182 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
183 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
184 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
185 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
186 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
187 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
188 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
189 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
190 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
191 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
192 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
193 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
194 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
195 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
196 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
197 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
198 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
199 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
200 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
201 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
202 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
203 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
204
205 while (1) {
206 Z.hi ^= Htable[n].hi;
207 Z.lo ^= Htable[n].lo;
208
209 if ((u8 *)Xi == xi)
210 break;
211
212 n = *(--xi);
213
214 rem = (size_t)Z.lo & 0xff;
215 Z.lo = (Z.hi << 56)|(Z.lo >> 8);
216 Z.hi = (Z.hi >> 8);
217#if SIZE_MAX == 0xffffffffffffffff
218 Z.hi ^= rem_8bit[rem];
219#else
220 Z.hi ^= (u64)rem_8bit[rem] << 32;
221#endif
222 }
223
224 Xi[0] = htobe64(Z.hi);
225 Xi[1] = htobe64(Z.lo);
226}
227#define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
228
229#elif TABLE_BITS==4
230
231static void
232gcm_init_4bit(u128 Htable[16], u64 H[2])
233{ 61{
234 u128 V; 62 u128 V;
235 int i; 63 uint64_t T;
64 int i;
236 65
237 Htable[0].hi = 0; 66 Htable[0].hi = 0;
238 Htable[0].lo = 0; 67 Htable[0].lo = 0;
@@ -240,57 +69,41 @@ gcm_init_4bit(u128 Htable[16], u64 H[2])
240 V.lo = H[1]; 69 V.lo = H[1];
241 70
242 for (Htable[8] = V, i = 4; i > 0; i >>= 1) { 71 for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
243 REDUCE1BIT(V); 72 T = U64(0xe100000000000000) & (0 - (V.lo & 1));
73 V.lo = (V.hi << 63) | (V.lo >> 1);
74 V.hi = (V.hi >> 1 ) ^ T;
244 Htable[i] = V; 75 Htable[i] = V;
245 } 76 }
246 77
247 for (i = 2; i < 16; i <<= 1) { 78 for (i = 2; i < 16; i <<= 1) {
248 u128 *Hi = Htable + i; 79 u128 *Hi = Htable + i;
249 int j; 80 int j;
250 for (V = *Hi, j = 1; j < i; ++j) { 81 for (V = *Hi, j = 1; j < i; j++) {
251 Hi[j].hi = V.hi ^ Htable[j].hi; 82 Hi[j].hi = V.hi ^ Htable[j].hi;
252 Hi[j].lo = V.lo ^ Htable[j].lo; 83 Hi[j].lo = V.lo ^ Htable[j].lo;
253 } 84 }
254 } 85 }
86}
255 87
256#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm)) 88#if !defined(HAVE_GCM_GHASH_4BIT) && !defined(HAVE_GCM_GMULT_4BIT)
257 /* 89static const uint16_t rem_4bit[16] = {
258 * ARM assembler expects specific dword order in Htable. 90 0x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0,
259 */ 91 0xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0,
260 { 92};
261 int j;
262#if BYTE_ORDER == LITTLE_ENDIAN
263 for (j = 0; j < 16; ++j) {
264 V = Htable[j];
265 Htable[j].hi = V.lo;
266 Htable[j].lo = V.hi;
267 }
268#else /* BIG_ENDIAN */
269 for (j = 0; j < 16; ++j) {
270 V = Htable[j];
271 Htable[j].hi = V.lo << 32|V.lo >> 32;
272 Htable[j].lo = V.hi << 32|V.hi >> 32;
273 }
274#endif
275 }
276#endif 93#endif
277}
278 94
279#ifndef GHASH_ASM 95#ifdef HAVE_GCM_GMULT_4BIT
280static const size_t rem_4bit[16] = { 96void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
281 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
282 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
283 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
284 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
285 97
98#else
286static void 99static void
287gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) 100gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16])
288{ 101{
289 u128 Z; 102 u128 Z;
290 int cnt = 15; 103 int cnt = 15;
291 size_t rem, nlo, nhi; 104 size_t rem, nlo, nhi;
292 105
293 nlo = ((const u8 *)Xi)[15]; 106 nlo = ((const uint8_t *)Xi)[15];
294 nhi = nlo >> 4; 107 nhi = nlo >> 4;
295 nlo &= 0xf; 108 nlo &= 0xf;
296 109
@@ -301,29 +114,21 @@ gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
301 rem = (size_t)Z.lo & 0xf; 114 rem = (size_t)Z.lo & 0xf;
302 Z.lo = (Z.hi << 60)|(Z.lo >> 4); 115 Z.lo = (Z.hi << 60)|(Z.lo >> 4);
303 Z.hi = (Z.hi >> 4); 116 Z.hi = (Z.hi >> 4);
304#if SIZE_MAX == 0xffffffffffffffff 117 Z.hi ^= (uint64_t)rem_4bit[rem] << 48;
305 Z.hi ^= rem_4bit[rem];
306#else
307 Z.hi ^= (u64)rem_4bit[rem] << 32;
308#endif
309 Z.hi ^= Htable[nhi].hi; 118 Z.hi ^= Htable[nhi].hi;
310 Z.lo ^= Htable[nhi].lo; 119 Z.lo ^= Htable[nhi].lo;
311 120
312 if (--cnt < 0) 121 if (--cnt < 0)
313 break; 122 break;
314 123
315 nlo = ((const u8 *)Xi)[cnt]; 124 nlo = ((const uint8_t *)Xi)[cnt];
316 nhi = nlo >> 4; 125 nhi = nlo >> 4;
317 nlo &= 0xf; 126 nlo &= 0xf;
318 127
319 rem = (size_t)Z.lo & 0xf; 128 rem = (size_t)Z.lo & 0xf;
320 Z.lo = (Z.hi << 60)|(Z.lo >> 4); 129 Z.lo = (Z.hi << 60)|(Z.lo >> 4);
321 Z.hi = (Z.hi >> 4); 130 Z.hi = (Z.hi >> 4);
322#if SIZE_MAX == 0xffffffffffffffff 131 Z.hi ^= (uint64_t)rem_4bit[rem] << 48;
323 Z.hi ^= rem_4bit[rem];
324#else
325 Z.hi ^= (u64)rem_4bit[rem] << 32;
326#endif
327 Z.hi ^= Htable[nlo].hi; 132 Z.hi ^= Htable[nlo].hi;
328 Z.lo ^= Htable[nlo].lo; 133 Z.lo ^= Htable[nlo].lo;
329 } 134 }
@@ -331,26 +136,24 @@ gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
331 Xi[0] = htobe64(Z.hi); 136 Xi[0] = htobe64(Z.hi);
332 Xi[1] = htobe64(Z.lo); 137 Xi[1] = htobe64(Z.lo);
333} 138}
139#endif
334 140
335/* 141#ifdef HAVE_GCM_GHASH_4BIT
336 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for 142void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
337 * details... Compiler-generated code doesn't seem to give any 143 size_t len);
338 * performance improvement, at least not on x86[_64]. It's here 144
339 * mostly as reference and a placeholder for possible future 145#else
340 * non-trivial optimization[s]...
341 */
342static void 146static void
343gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], 147gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16],
344 const u8 *inp, size_t len) 148 const uint8_t *inp, size_t len)
345{ 149{
346 u128 Z; 150 u128 Z;
347 int cnt; 151 int cnt;
348 size_t rem, nlo, nhi; 152 size_t rem, nlo, nhi;
349 153
350#if 1
351 do { 154 do {
352 cnt = 15; 155 cnt = 15;
353 nlo = ((const u8 *)Xi)[15]; 156 nlo = ((const uint8_t *)Xi)[15];
354 nlo ^= inp[15]; 157 nlo ^= inp[15];
355 nhi = nlo >> 4; 158 nhi = nlo >> 4;
356 nlo &= 0xf; 159 nlo &= 0xf;
@@ -362,18 +165,14 @@ gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
362 rem = (size_t)Z.lo & 0xf; 165 rem = (size_t)Z.lo & 0xf;
363 Z.lo = (Z.hi << 60)|(Z.lo >> 4); 166 Z.lo = (Z.hi << 60)|(Z.lo >> 4);
364 Z.hi = (Z.hi >> 4); 167 Z.hi = (Z.hi >> 4);
365#if SIZE_MAX == 0xffffffffffffffff 168 Z.hi ^= (uint64_t)rem_4bit[rem] << 48;
366 Z.hi ^= rem_4bit[rem];
367#else
368 Z.hi ^= (u64)rem_4bit[rem] << 32;
369#endif
370 Z.hi ^= Htable[nhi].hi; 169 Z.hi ^= Htable[nhi].hi;
371 Z.lo ^= Htable[nhi].lo; 170 Z.lo ^= Htable[nhi].lo;
372 171
373 if (--cnt < 0) 172 if (--cnt < 0)
374 break; 173 break;
375 174
376 nlo = ((const u8 *)Xi)[cnt]; 175 nlo = ((const uint8_t *)Xi)[cnt];
377 nlo ^= inp[cnt]; 176 nlo ^= inp[cnt];
378 nhi = nlo >> 4; 177 nhi = nlo >> 4;
379 nlo &= 0xf; 178 nlo &= 0xf;
@@ -381,205 +180,40 @@ gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
381 rem = (size_t)Z.lo & 0xf; 180 rem = (size_t)Z.lo & 0xf;
382 Z.lo = (Z.hi << 60)|(Z.lo >> 4); 181 Z.lo = (Z.hi << 60)|(Z.lo >> 4);
383 Z.hi = (Z.hi >> 4); 182 Z.hi = (Z.hi >> 4);
384#if SIZE_MAX == 0xffffffffffffffff 183 Z.hi ^= (uint64_t)rem_4bit[rem] << 48;
385 Z.hi ^= rem_4bit[rem];
386#else
387 Z.hi ^= (u64)rem_4bit[rem] << 32;
388#endif
389 Z.hi ^= Htable[nlo].hi; 184 Z.hi ^= Htable[nlo].hi;
390 Z.lo ^= Htable[nlo].lo; 185 Z.lo ^= Htable[nlo].lo;
391 } 186 }
392#else
393 /*
394 * Extra 256+16 bytes per-key plus 512 bytes shared tables
395 * [should] give ~50% improvement... One could have PACK()-ed
396 * the rem_8bit even here, but the priority is to minimize
397 * cache footprint...
398 */
399 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
400 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
401 static const unsigned short rem_8bit[256] = {
402 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
403 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
404 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
405 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
406 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
407 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
408 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
409 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
410 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
411 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
412 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
413 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
414 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
415 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
416 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
417 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
418 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
419 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
420 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
421 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
422 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
423 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
424 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
425 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
426 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
427 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
428 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
429 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
430 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
431 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
432 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
433 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
434 /*
435 * This pre-processing phase slows down procedure by approximately
436 * same time as it makes each loop spin faster. In other words
437 * single block performance is approximately same as straightforward
438 * "4-bit" implementation, and then it goes only faster...
439 */
440 for (cnt = 0; cnt < 16; ++cnt) {
441 Z.hi = Htable[cnt].hi;
442 Z.lo = Htable[cnt].lo;
443 Hshr4[cnt].lo = (Z.hi << 60)|(Z.lo >> 4);
444 Hshr4[cnt].hi = (Z.hi >> 4);
445 Hshl4[cnt] = (u8)(Z.lo << 4);
446 }
447
448 do {
449 for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
450 nlo = ((const u8 *)Xi)[cnt];
451 nlo ^= inp[cnt];
452 nhi = nlo >> 4;
453 nlo &= 0xf;
454
455 Z.hi ^= Htable[nlo].hi;
456 Z.lo ^= Htable[nlo].lo;
457
458 rem = (size_t)Z.lo & 0xff;
459
460 Z.lo = (Z.hi << 56)|(Z.lo >> 8);
461 Z.hi = (Z.hi >> 8);
462
463 Z.hi ^= Hshr4[nhi].hi;
464 Z.lo ^= Hshr4[nhi].lo;
465 Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
466 }
467
468 nlo = ((const u8 *)Xi)[0];
469 nlo ^= inp[0];
470 nhi = nlo >> 4;
471 nlo &= 0xf;
472
473 Z.hi ^= Htable[nlo].hi;
474 Z.lo ^= Htable[nlo].lo;
475
476 rem = (size_t)Z.lo & 0xf;
477
478 Z.lo = (Z.hi << 60)|(Z.lo >> 4);
479 Z.hi = (Z.hi >> 4);
480
481 Z.hi ^= Htable[nhi].hi;
482 Z.lo ^= Htable[nhi].lo;
483 Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
484#endif
485 187
486 Xi[0] = htobe64(Z.hi); 188 Xi[0] = htobe64(Z.hi);
487 Xi[1] = htobe64(Z.lo); 189 Xi[1] = htobe64(Z.lo);
488 } while (inp += 16, len -= 16); 190 } while (inp += 16, len -= 16);
489} 191}
490#else
491void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
492void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
493 size_t len);
494#endif 192#endif
495 193
496#define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable) 194static inline void
497#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len) 195gcm_mul(GCM128_CONTEXT *ctx, uint64_t u[2])
498/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
499 * trashing effect. In other words idea is to hash data while it's
500 * still in L1 cache after encryption pass... */
501#define GHASH_CHUNK (3*1024)
502
503#else /* TABLE_BITS */
504
505static void
506gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
507{ 196{
508 u128 V, Z = { 0, 0 }; 197 ctx->gmult(u, ctx->Htable);
509 u64 X;
510 int i, j;
511
512 V.hi = H[0]; /* H is in host byte order, no byte swapping */
513 V.lo = H[1];
514
515 for (j = 0; j < 2; j++) {
516 X = be64toh(Xi[j]);
517
518 for (i = 0; i < 64; i++) {
519 u64 M = 0 - (X >> 63);
520 Z.hi ^= V.hi & M;
521 Z.lo ^= V.lo & M;
522 X <<= 1;
523
524 REDUCE1BIT(V);
525 }
526 }
527
528 Xi[0] = htobe64(Z.hi);
529 Xi[1] = htobe64(Z.lo);
530} 198}
531#define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
532
533#endif
534
535#if defined(GHASH_ASM) && \
536 (defined(__i386) || defined(__i386__) || \
537 defined(__x86_64) || defined(__x86_64__) || \
538 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
539#include "x86_arch.h"
540#endif
541
542#if TABLE_BITS==4 && defined(GHASH_ASM)
543# if (defined(__i386) || defined(__i386__) || \
544 defined(__x86_64) || defined(__x86_64__) || \
545 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
546# define GHASH_ASM_X86_OR_64
547# define GCM_FUNCREF_4BIT
548 199
549void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]); 200static inline void
550void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]); 201gcm_ghash(GCM128_CONTEXT *ctx, const uint8_t *in, size_t len)
551void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp, 202{
552 size_t len); 203 ctx->ghash(ctx->Xi.u, ctx->Htable, in, len);
204}
553 205
554# if defined(__i386) || defined(__i386__) || defined(_M_IX86) 206#ifdef HAVE_GCM128_INIT
555# define GHASH_ASM_X86 207void gcm128_init(GCM128_CONTEXT *ctx);
556void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
557void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
558 size_t len);
559 208
560void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]); 209#else
561void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp, 210static void
562 size_t len); 211gcm128_init(GCM128_CONTEXT *ctx)
563# endif 212{
564# elif defined(__arm__) || defined(__arm) 213 gcm_init_4bit(ctx->Htable, ctx->H.u);
565# include "arm_arch.h" 214 ctx->gmult = gcm_gmult_4bit;
566# if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) 215 ctx->ghash = gcm_ghash_4bit;
567# define GHASH_ASM_ARM 216}
568# define GCM_FUNCREF_4BIT
569void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
570void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
571 size_t len);
572# endif
573# endif
574#endif
575
576#ifdef GCM_FUNCREF_4BIT
577# undef GCM_MUL
578# define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
579# ifdef GHASH
580# undef GHASH
581# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
582# endif
583#endif 217#endif
584 218
585void 219void
@@ -595,60 +229,35 @@ CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
595 ctx->H.u[0] = be64toh(ctx->H.u[0]); 229 ctx->H.u[0] = be64toh(ctx->H.u[0]);
596 ctx->H.u[1] = be64toh(ctx->H.u[1]); 230 ctx->H.u[1] = be64toh(ctx->H.u[1]);
597 231
598#if TABLE_BITS==8 232 gcm128_init(ctx);
599 gcm_init_8bit(ctx->Htable, ctx->H.u);
600#elif TABLE_BITS==4
601# if defined(GHASH_ASM_X86_OR_64)
602# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
603 /* check FXSR and PCLMULQDQ bits */
604 if ((crypto_cpu_caps_ia32() & (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) ==
605 (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) {
606 gcm_init_clmul(ctx->Htable, ctx->H.u);
607 ctx->gmult = gcm_gmult_clmul;
608 ctx->ghash = gcm_ghash_clmul;
609 return;
610 }
611# endif
612 gcm_init_4bit(ctx->Htable, ctx->H.u);
613# if defined(GHASH_ASM_X86) /* x86 only */
614# if defined(OPENSSL_IA32_SSE2)
615 if (crypto_cpu_caps_ia32() & CPUCAP_MASK_SSE) { /* check SSE bit */
616# else
617 if (crypto_cpu_caps_ia32() & CPUCAP_MASK_MMX) { /* check MMX bit */
618# endif
619 ctx->gmult = gcm_gmult_4bit_mmx;
620 ctx->ghash = gcm_ghash_4bit_mmx;
621 } else {
622 ctx->gmult = gcm_gmult_4bit_x86;
623 ctx->ghash = gcm_ghash_4bit_x86;
624 }
625# else
626 ctx->gmult = gcm_gmult_4bit;
627 ctx->ghash = gcm_ghash_4bit;
628# endif
629# elif defined(GHASH_ASM_ARM)
630 if (OPENSSL_armcap_P & ARMV7_NEON) {
631 ctx->gmult = gcm_gmult_neon;
632 ctx->ghash = gcm_ghash_neon;
633 } else {
634 gcm_init_4bit(ctx->Htable, ctx->H.u);
635 ctx->gmult = gcm_gmult_4bit;
636 ctx->ghash = gcm_ghash_4bit;
637 }
638# else
639 gcm_init_4bit(ctx->Htable, ctx->H.u);
640# endif
641#endif
642} 233}
643LCRYPTO_ALIAS(CRYPTO_gcm128_init); 234LCRYPTO_ALIAS(CRYPTO_gcm128_init);
644 235
236GCM128_CONTEXT *
237CRYPTO_gcm128_new(void *key, block128_f block)
238{
239 GCM128_CONTEXT *ctx;
240
241 if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
242 return NULL;
243
244 CRYPTO_gcm128_init(ctx, key, block);
245
246 return ctx;
247}
248LCRYPTO_ALIAS(CRYPTO_gcm128_new);
249
250void
251CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
252{
253 freezero(ctx, sizeof(*ctx));
254}
255LCRYPTO_ALIAS(CRYPTO_gcm128_release);
256
645void 257void
646CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, size_t len) 258CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, size_t len)
647{ 259{
648 unsigned int ctr; 260 unsigned int ctr;
649#ifdef GCM_FUNCREF_4BIT
650 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
651#endif
652 261
653 ctx->Yi.u[0] = 0; 262 ctx->Yi.u[0] = 0;
654 ctx->Yi.u[1] = 0; 263 ctx->Yi.u[1] = 0;
@@ -665,573 +274,277 @@ CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, size_t len)
665 ctr = 1; 274 ctr = 1;
666 } else { 275 } else {
667 size_t i; 276 size_t i;
668 u64 len0 = len; 277 uint64_t len0 = len;
669 278
670 while (len >= 16) { 279 while (len >= 16) {
671 for (i = 0; i < 16; ++i) 280 for (i = 0; i < 16; i++)
672 ctx->Yi.c[i] ^= iv[i]; 281 ctx->Yi.c[i] ^= iv[i];
673 GCM_MUL(ctx, Yi); 282 gcm_mul(ctx, ctx->Yi.u);
674 iv += 16; 283 iv += 16;
675 len -= 16; 284 len -= 16;
676 } 285 }
677 if (len) { 286 if (len > 0) {
678 for (i = 0; i < len; ++i) 287 for (i = 0; i < len; i++)
679 ctx->Yi.c[i] ^= iv[i]; 288 ctx->Yi.c[i] ^= iv[i];
680 GCM_MUL(ctx, Yi); 289 gcm_mul(ctx, ctx->Yi.u);
681 } 290 }
682 len0 <<= 3; 291 len0 <<= 3;
683 ctx->Yi.u[1] ^= htobe64(len0); 292 ctx->Yi.u[1] ^= htobe64(len0);
684 293
685 GCM_MUL(ctx, Yi); 294 gcm_mul(ctx, ctx->Yi.u);
686 295
687 ctr = be32toh(ctx->Yi.d[3]); 296 ctr = be32toh(ctx->Yi.d[3]);
688 } 297 }
689 298
690 (*ctx->block)(ctx->Yi.c, ctx->EK0.c, ctx->key); 299 (*ctx->block)(ctx->Yi.c, ctx->EK0.c, ctx->key);
691 ++ctr; 300 ctx->Yi.d[3] = htobe32(++ctr);
692 ctx->Yi.d[3] = htobe32(ctr);
693} 301}
694LCRYPTO_ALIAS(CRYPTO_gcm128_setiv); 302LCRYPTO_ALIAS(CRYPTO_gcm128_setiv);
695 303
696int 304int
697CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, size_t len) 305CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, size_t len)
698{ 306{
699 size_t i;
700 unsigned int n; 307 unsigned int n;
701 u64 alen = ctx->len.u[0]; 308 uint64_t alen;
702#ifdef GCM_FUNCREF_4BIT 309 size_t i;
703 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
704# ifdef GHASH
705 void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
706 const u8 *inp, size_t len) = ctx->ghash;
707# endif
708#endif
709 310
710 if (ctx->len.u[1]) 311 if (ctx->len.u[1] != 0)
711 return -2; 312 return -2;
712 313
713 alen += len; 314 alen = ctx->len.u[0] + len;
714 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len)) 315 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
715 return -1; 316 return -1;
716 ctx->len.u[0] = alen; 317 ctx->len.u[0] = alen;
717 318
718 n = ctx->ares; 319 if ((n = ctx->ares) > 0) {
719 if (n) { 320 while (n > 0 && len > 0) {
720 while (n && len) {
721 ctx->Xi.c[n] ^= *(aad++); 321 ctx->Xi.c[n] ^= *(aad++);
722 --len;
723 n = (n + 1) % 16; 322 n = (n + 1) % 16;
323 len--;
724 } 324 }
725 if (n == 0) 325 if (n > 0) {
726 GCM_MUL(ctx, Xi);
727 else {
728 ctx->ares = n; 326 ctx->ares = n;
729 return 0; 327 return 0;
730 } 328 }
329 gcm_mul(ctx, ctx->Xi.u);
731 } 330 }
732 331
733#ifdef GHASH 332 if ((i = (len & (size_t)-16)) > 0) {
734 if ((i = (len & (size_t)-16))) { 333 gcm_ghash(ctx, aad, i);
735 GHASH(ctx, aad, i);
736 aad += i; 334 aad += i;
737 len -= i; 335 len -= i;
738 } 336 }
739#else 337 if (len > 0) {
740 while (len >= 16) {
741 for (i = 0; i < 16; ++i)
742 ctx->Xi.c[i] ^= aad[i];
743 GCM_MUL(ctx, Xi);
744 aad += 16;
745 len -= 16;
746 }
747#endif
748 if (len) {
749 n = (unsigned int)len; 338 n = (unsigned int)len;
750 for (i = 0; i < len; ++i) 339 for (i = 0; i < len; i++)
751 ctx->Xi.c[i] ^= aad[i]; 340 ctx->Xi.c[i] ^= aad[i];
752 } 341 }
753
754 ctx->ares = n; 342 ctx->ares = n;
343
755 return 0; 344 return 0;
756} 345}
757LCRYPTO_ALIAS(CRYPTO_gcm128_aad); 346LCRYPTO_ALIAS(CRYPTO_gcm128_aad);
758 347
759int 348int
760CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, 349CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const unsigned char *in,
761 const unsigned char *in, unsigned char *out, 350 unsigned char *out, size_t len)
762 size_t len)
763{ 351{
764 unsigned int n, ctr; 352 unsigned int n, ctr;
353 uint64_t mlen;
765 size_t i; 354 size_t i;
766 u64 mlen = ctx->len.u[1];
767 block128_f block = ctx->block;
768 void *key = ctx->key;
769#ifdef GCM_FUNCREF_4BIT
770 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
771# ifdef GHASH
772 void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
773 const u8 *inp, size_t len) = ctx->ghash;
774# endif
775#endif
776 355
777 mlen += len; 356 mlen = ctx->len.u[1] + len;
778 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 357 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
779 return -1; 358 return -1;
780 ctx->len.u[1] = mlen; 359 ctx->len.u[1] = mlen;
781 360
782 if (ctx->ares) { 361 if (ctx->ares > 0) {
783 /* First call to encrypt finalizes GHASH(AAD) */ 362 /* First call to encrypt finalizes GHASH(AAD) */
784 GCM_MUL(ctx, Xi); 363 gcm_mul(ctx, ctx->Xi.u);
785 ctx->ares = 0; 364 ctx->ares = 0;
786 } 365 }
787 366
788 ctr = be32toh(ctx->Yi.d[3]); 367 ctr = be32toh(ctx->Yi.d[3]);
789 368
790 n = ctx->mres; 369 n = ctx->mres;
791 if (16 % sizeof(size_t) == 0)
792 do { /* always true actually */
793 if (n) {
794 while (n && len) {
795 ctx->Xi.c[n] ^= *(out++) = *(in++) ^
796 ctx->EKi.c[n];
797 --len;
798 n = (n + 1) % 16;
799 }
800 if (n == 0)
801 GCM_MUL(ctx, Xi);
802 else {
803 ctx->mres = n;
804 return 0;
805 }
806 }
807#ifdef __STRICT_ALIGNMENT
808 if (((size_t)in|(size_t)out) % sizeof(size_t) != 0)
809 break;
810#endif
811#if defined(GHASH) && defined(GHASH_CHUNK)
812 while (len >= GHASH_CHUNK) {
813 size_t j = GHASH_CHUNK;
814
815 while (j) {
816 size_t *out_t = (size_t *)out;
817 const size_t *in_t = (const size_t *)in;
818
819 (*block)(ctx->Yi.c, ctx->EKi.c, key);
820 ++ctr;
821 ctx->Yi.d[3] = htobe32(ctr);
822
823 for (i = 0; i < 16/sizeof(size_t); ++i)
824 out_t[i] = in_t[i] ^
825 ctx->EKi.t[i];
826 out += 16;
827 in += 16;
828 j -= 16;
829 }
830 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
831 len -= GHASH_CHUNK;
832 }
833 if ((i = (len & (size_t)-16))) {
834 size_t j = i;
835
836 while (len >= 16) {
837 size_t *out_t = (size_t *)out;
838 const size_t *in_t = (const size_t *)in;
839
840 (*block)(ctx->Yi.c, ctx->EKi.c, key);
841 ++ctr;
842 ctx->Yi.d[3] = htobe32(ctr);
843
844 for (i = 0; i < 16/sizeof(size_t); ++i)
845 out_t[i] = in_t[i] ^
846 ctx->EKi.t[i];
847 out += 16;
848 in += 16;
849 len -= 16;
850 }
851 GHASH(ctx, out - j, j);
852 }
853#else
854 while (len >= 16) {
855 size_t *out_t = (size_t *)out;
856 const size_t *in_t = (const size_t *)in;
857
858 (*block)(ctx->Yi.c, ctx->EKi.c, key);
859 ++ctr;
860 ctx->Yi.d[3] = htobe32(ctr);
861
862 for (i = 0; i < 16/sizeof(size_t); ++i)
863 ctx->Xi.t[i] ^=
864 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
865 GCM_MUL(ctx, Xi);
866 out += 16;
867 in += 16;
868 len -= 16;
869 }
870#endif
871 if (len) {
872 (*block)(ctx->Yi.c, ctx->EKi.c, key);
873 ++ctr;
874 ctx->Yi.d[3] = htobe32(ctr);
875
876 while (len--) {
877 ctx->Xi.c[n] ^= out[n] = in[n] ^
878 ctx->EKi.c[n];
879 ++n;
880 }
881 }
882 370
883 ctx->mres = n; 371 for (i = 0; i < len; i++) {
884 return 0;
885 } while (0);
886 for (i = 0; i < len; ++i) {
887 if (n == 0) { 372 if (n == 0) {
888 (*block)(ctx->Yi.c, ctx->EKi.c, key); 373 ctx->block(ctx->Yi.c, ctx->EKi.c, ctx->key);
889 ++ctr; 374 ctx->Yi.d[3] = htobe32(++ctr);
890 ctx->Yi.d[3] = htobe32(ctr);
891 } 375 }
892 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n]; 376 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
893 n = (n + 1) % 16; 377 n = (n + 1) % 16;
894 if (n == 0) 378 if (n == 0)
895 GCM_MUL(ctx, Xi); 379 gcm_mul(ctx, ctx->Xi.u);
896 } 380 }
897 381
898 ctx->mres = n; 382 ctx->mres = n;
383
899 return 0; 384 return 0;
900} 385}
901LCRYPTO_ALIAS(CRYPTO_gcm128_encrypt); 386LCRYPTO_ALIAS(CRYPTO_gcm128_encrypt);
902 387
903int 388int
904CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, 389CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const unsigned char *in,
905 const unsigned char *in, unsigned char *out, 390 unsigned char *out, size_t len)
906 size_t len)
907{ 391{
908 unsigned int n, ctr; 392 unsigned int n, ctr;
393 uint64_t mlen;
394 uint8_t c;
909 size_t i; 395 size_t i;
910 u64 mlen = ctx->len.u[1];
911 block128_f block = ctx->block;
912 void *key = ctx->key;
913#ifdef GCM_FUNCREF_4BIT
914 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
915# ifdef GHASH
916 void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
917 const u8 *inp, size_t len) = ctx->ghash;
918# endif
919#endif
920 396
921 mlen += len; 397 mlen = ctx->len.u[1] + len;
922 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 398 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
923 return -1; 399 return -1;
924 ctx->len.u[1] = mlen; 400 ctx->len.u[1] = mlen;
925 401
926 if (ctx->ares) { 402 if (ctx->ares) {
927 /* First call to decrypt finalizes GHASH(AAD) */ 403 /* First call to decrypt finalizes GHASH(AAD) */
928 GCM_MUL(ctx, Xi); 404 gcm_mul(ctx, ctx->Xi.u);
929 ctx->ares = 0; 405 ctx->ares = 0;
930 } 406 }
931 407
932 ctr = be32toh(ctx->Yi.d[3]); 408 ctr = be32toh(ctx->Yi.d[3]);
933 409
934 n = ctx->mres; 410 n = ctx->mres;
935 if (16 % sizeof(size_t) == 0)
936 do { /* always true actually */
937 if (n) {
938 while (n && len) {
939 u8 c = *(in++);
940 *(out++) = c ^ ctx->EKi.c[n];
941 ctx->Xi.c[n] ^= c;
942 --len;
943 n = (n + 1) % 16;
944 }
945 if (n == 0)
946 GCM_MUL(ctx, Xi);
947 else {
948 ctx->mres = n;
949 return 0;
950 }
951 }
952#ifdef __STRICT_ALIGNMENT
953 if (((size_t)in|(size_t)out) % sizeof(size_t) != 0)
954 break;
955#endif
956#if defined(GHASH) && defined(GHASH_CHUNK)
957 while (len >= GHASH_CHUNK) {
958 size_t j = GHASH_CHUNK;
959
960 GHASH(ctx, in, GHASH_CHUNK);
961 while (j) {
962 size_t *out_t = (size_t *)out;
963 const size_t *in_t = (const size_t *)in;
964
965 (*block)(ctx->Yi.c, ctx->EKi.c, key);
966 ++ctr;
967 ctx->Yi.d[3] = htobe32(ctr);
968
969 for (i = 0; i < 16/sizeof(size_t); ++i)
970 out_t[i] = in_t[i] ^
971 ctx->EKi.t[i];
972 out += 16;
973 in += 16;
974 j -= 16;
975 }
976 len -= GHASH_CHUNK;
977 }
978 if ((i = (len & (size_t)-16))) {
979 GHASH(ctx, in, i);
980 while (len >= 16) {
981 size_t *out_t = (size_t *)out;
982 const size_t *in_t = (const size_t *)in;
983
984 (*block)(ctx->Yi.c, ctx->EKi.c, key);
985 ++ctr;
986 ctx->Yi.d[3] = htobe32(ctr);
987
988 for (i = 0; i < 16/sizeof(size_t); ++i)
989 out_t[i] = in_t[i] ^
990 ctx->EKi.t[i];
991 out += 16;
992 in += 16;
993 len -= 16;
994 }
995 }
996#else
997 while (len >= 16) {
998 size_t *out_t = (size_t *)out;
999 const size_t *in_t = (const size_t *)in;
1000
1001 (*block)(ctx->Yi.c, ctx->EKi.c, key);
1002 ++ctr;
1003 ctx->Yi.d[3] = htobe32(ctr);
1004
1005 for (i = 0; i < 16/sizeof(size_t); ++i) {
1006 size_t c = in_t[i];
1007 out_t[i] = c ^ ctx->EKi.t[i];
1008 ctx->Xi.t[i] ^= c;
1009 }
1010 GCM_MUL(ctx, Xi);
1011 out += 16;
1012 in += 16;
1013 len -= 16;
1014 }
1015#endif
1016 if (len) {
1017 (*block)(ctx->Yi.c, ctx->EKi.c, key);
1018 ++ctr;
1019 ctx->Yi.d[3] = htobe32(ctr);
1020
1021 while (len--) {
1022 u8 c = in[n];
1023 ctx->Xi.c[n] ^= c;
1024 out[n] = c ^ ctx->EKi.c[n];
1025 ++n;
1026 }
1027 }
1028 411
1029 ctx->mres = n; 412 for (i = 0; i < len; i++) {
1030 return 0;
1031 } while (0);
1032 for (i = 0; i < len; ++i) {
1033 u8 c;
1034 if (n == 0) { 413 if (n == 0) {
1035 (*block)(ctx->Yi.c, ctx->EKi.c, key); 414 ctx->block(ctx->Yi.c, ctx->EKi.c, ctx->key);
1036 ++ctr; 415 ctx->Yi.d[3] = htobe32(++ctr);
1037 ctx->Yi.d[3] = htobe32(ctr);
1038 } 416 }
1039 c = in[i]; 417 c = in[i];
1040 out[i] = c ^ ctx->EKi.c[n]; 418 out[i] = c ^ ctx->EKi.c[n];
1041 ctx->Xi.c[n] ^= c; 419 ctx->Xi.c[n] ^= c;
1042 n = (n + 1) % 16; 420 n = (n + 1) % 16;
1043 if (n == 0) 421 if (n == 0)
1044 GCM_MUL(ctx, Xi); 422 gcm_mul(ctx, ctx->Xi.u);
1045 } 423 }
1046 424
1047 ctx->mres = n; 425 ctx->mres = n;
426
1048 return 0; 427 return 0;
1049} 428}
1050LCRYPTO_ALIAS(CRYPTO_gcm128_decrypt); 429LCRYPTO_ALIAS(CRYPTO_gcm128_decrypt);
1051 430
1052int 431int
1053CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, 432CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const unsigned char *in,
1054 const unsigned char *in, unsigned char *out, 433 unsigned char *out, size_t len, ctr128_f stream)
1055 size_t len, ctr128_f stream)
1056{ 434{
1057 unsigned int n, ctr; 435 unsigned int n, ctr;
1058 size_t i; 436 uint64_t mlen;
1059 u64 mlen = ctx->len.u[1]; 437 size_t i, j;
1060 void *key = ctx->key;
1061#ifdef GCM_FUNCREF_4BIT
1062 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1063# ifdef GHASH
1064 void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
1065 const u8 *inp, size_t len) = ctx->ghash;
1066# endif
1067#endif
1068 438
1069 mlen += len; 439 mlen = ctx->len.u[1] + len;
1070 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 440 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1071 return -1; 441 return -1;
1072 ctx->len.u[1] = mlen; 442 ctx->len.u[1] = mlen;
1073 443
1074 if (ctx->ares) { 444 if (ctx->ares > 0) {
1075 /* First call to encrypt finalizes GHASH(AAD) */ 445 /* First call to encrypt finalizes GHASH(AAD) */
1076 GCM_MUL(ctx, Xi); 446 gcm_mul(ctx, ctx->Xi.u);
1077 ctx->ares = 0; 447 ctx->ares = 0;
1078 } 448 }
1079 449
1080 ctr = be32toh(ctx->Yi.d[3]); 450 ctr = be32toh(ctx->Yi.d[3]);
1081 451
1082 n = ctx->mres; 452 if ((n = ctx->mres) > 0) {
1083 if (n) { 453 while (n > 0 && len > 0) {
1084 while (n && len) {
1085 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 454 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1086 --len;
1087 n = (n + 1) % 16; 455 n = (n + 1) % 16;
456 len--;
1088 } 457 }
1089 if (n == 0) 458 if (n > 0) {
1090 GCM_MUL(ctx, Xi);
1091 else {
1092 ctx->mres = n; 459 ctx->mres = n;
1093 return 0; 460 return 0;
1094 } 461 }
462 gcm_mul(ctx, ctx->Xi.u);
1095 } 463 }
1096#if defined(GHASH) && defined(GHASH_CHUNK) 464 if ((i = (len & (size_t)-16)) > 0) {
1097 while (len >= GHASH_CHUNK) { 465 j = i / 16;
1098 (*stream)(in, out, GHASH_CHUNK/16, key, ctx->Yi.c); 466 stream(in, out, j, ctx->key, ctx->Yi.c);
1099 ctr += GHASH_CHUNK/16;
1100 ctx->Yi.d[3] = htobe32(ctr);
1101 GHASH(ctx, out, GHASH_CHUNK);
1102 out += GHASH_CHUNK;
1103 in += GHASH_CHUNK;
1104 len -= GHASH_CHUNK;
1105 }
1106#endif
1107 if ((i = (len & (size_t)-16))) {
1108 size_t j = i/16;
1109
1110 (*stream)(in, out, j, key, ctx->Yi.c);
1111 ctr += (unsigned int)j; 467 ctr += (unsigned int)j;
1112 ctx->Yi.d[3] = htobe32(ctr); 468 ctx->Yi.d[3] = htobe32(ctr);
469 gcm_ghash(ctx, out, i);
1113 in += i; 470 in += i;
1114 len -= i;
1115#if defined(GHASH)
1116 GHASH(ctx, out, i);
1117 out += i; 471 out += i;
1118#else 472 len -= i;
1119 while (j--) {
1120 for (i = 0; i < 16; ++i)
1121 ctx->Xi.c[i] ^= out[i];
1122 GCM_MUL(ctx, Xi);
1123 out += 16;
1124 }
1125#endif
1126 } 473 }
1127 if (len) { 474 if (len > 0) {
1128 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key); 475 ctx->block(ctx->Yi.c, ctx->EKi.c, ctx->key);
1129 ++ctr; 476 ctx->Yi.d[3] = htobe32(++ctr);
1130 ctx->Yi.d[3] = htobe32(ctr); 477 while (len-- > 0) {
1131 while (len--) {
1132 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 478 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1133 ++n; 479 n++;
1134 } 480 }
1135 } 481 }
1136 482
1137 ctx->mres = n; 483 ctx->mres = n;
484
1138 return 0; 485 return 0;
1139} 486}
1140LCRYPTO_ALIAS(CRYPTO_gcm128_encrypt_ctr32); 487LCRYPTO_ALIAS(CRYPTO_gcm128_encrypt_ctr32);
1141 488
1142int 489int
1143CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, 490CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const unsigned char *in,
1144 const unsigned char *in, unsigned char *out, 491 unsigned char *out, size_t len, ctr128_f stream)
1145 size_t len, ctr128_f stream)
1146{ 492{
1147 unsigned int n, ctr; 493 unsigned int n, ctr;
1148 size_t i; 494 uint64_t mlen;
1149 u64 mlen = ctx->len.u[1]; 495 size_t i, j;
1150 void *key = ctx->key; 496 uint8_t c;
1151#ifdef GCM_FUNCREF_4BIT
1152 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1153# ifdef GHASH
1154 void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
1155 const u8 *inp, size_t len) = ctx->ghash;
1156# endif
1157#endif
1158 497
1159 mlen += len; 498 mlen = ctx->len.u[1] + len;
1160 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 499 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1161 return -1; 500 return -1;
1162 ctx->len.u[1] = mlen; 501 ctx->len.u[1] = mlen;
1163 502
1164 if (ctx->ares) { 503 if (ctx->ares > 0) {
1165 /* First call to decrypt finalizes GHASH(AAD) */ 504 /* First call to decrypt finalizes GHASH(AAD) */
1166 GCM_MUL(ctx, Xi); 505 gcm_mul(ctx, ctx->Xi.u);
1167 ctx->ares = 0; 506 ctx->ares = 0;
1168 } 507 }
1169 508
1170 ctr = be32toh(ctx->Yi.d[3]); 509 ctr = be32toh(ctx->Yi.d[3]);
1171 510
1172 n = ctx->mres; 511 if ((n = ctx->mres) > 0) {
1173 if (n) { 512 while (n > 0 && len > 0) {
1174 while (n && len) { 513 c = *(in++);
1175 u8 c = *(in++);
1176 *(out++) = c ^ ctx->EKi.c[n]; 514 *(out++) = c ^ ctx->EKi.c[n];
1177 ctx->Xi.c[n] ^= c; 515 ctx->Xi.c[n] ^= c;
1178 --len;
1179 n = (n + 1) % 16; 516 n = (n + 1) % 16;
517 len--;
1180 } 518 }
1181 if (n == 0) 519 if (n > 0) {
1182 GCM_MUL(ctx, Xi);
1183 else {
1184 ctx->mres = n; 520 ctx->mres = n;
1185 return 0; 521 return 0;
1186 } 522 }
523 gcm_mul(ctx, ctx->Xi.u);
1187 } 524 }
1188#if defined(GHASH) && defined(GHASH_CHUNK) 525 if ((i = (len & (size_t)-16)) > 0) {
1189 while (len >= GHASH_CHUNK) { 526 j = i / 16;
1190 GHASH(ctx, in, GHASH_CHUNK); 527 gcm_ghash(ctx, in, i);
1191 (*stream)(in, out, GHASH_CHUNK/16, key, ctx->Yi.c); 528 stream(in, out, j, ctx->key, ctx->Yi.c);
1192 ctr += GHASH_CHUNK/16;
1193 ctx->Yi.d[3] = htobe32(ctr);
1194 out += GHASH_CHUNK;
1195 in += GHASH_CHUNK;
1196 len -= GHASH_CHUNK;
1197 }
1198#endif
1199 if ((i = (len & (size_t)-16))) {
1200 size_t j = i/16;
1201
1202#if defined(GHASH)
1203 GHASH(ctx, in, i);
1204#else
1205 while (j--) {
1206 size_t k;
1207 for (k = 0; k < 16; ++k)
1208 ctx->Xi.c[k] ^= in[k];
1209 GCM_MUL(ctx, Xi);
1210 in += 16;
1211 }
1212 j = i/16;
1213 in -= i;
1214#endif
1215 (*stream)(in, out, j, key, ctx->Yi.c);
1216 ctr += (unsigned int)j; 529 ctr += (unsigned int)j;
1217 ctx->Yi.d[3] = htobe32(ctr); 530 ctx->Yi.d[3] = htobe32(ctr);
1218 out += i;
1219 in += i; 531 in += i;
532 out += i;
1220 len -= i; 533 len -= i;
1221 } 534 }
1222 if (len) { 535 if (len > 0) {
1223 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key); 536 ctx->block(ctx->Yi.c, ctx->EKi.c, ctx->key);
1224 ++ctr; 537 ctx->Yi.d[3] = htobe32(++ctr);
1225 ctx->Yi.d[3] = htobe32(ctr); 538 while (len-- > 0) {
1226 while (len--) { 539 c = in[n];
1227 u8 c = in[n];
1228 ctx->Xi.c[n] ^= c; 540 ctx->Xi.c[n] ^= c;
1229 out[n] = c ^ ctx->EKi.c[n]; 541 out[n] = c ^ ctx->EKi.c[n];
1230 ++n; 542 n++;
1231 } 543 }
1232 } 544 }
1233 545
1234 ctx->mres = n; 546 ctx->mres = n;
547
1235 return 0; 548 return 0;
1236} 549}
1237LCRYPTO_ALIAS(CRYPTO_gcm128_decrypt_ctr32); 550LCRYPTO_ALIAS(CRYPTO_gcm128_decrypt_ctr32);
@@ -1240,26 +553,25 @@ int
1240CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag, 553CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1241 size_t len) 554 size_t len)
1242{ 555{
1243 u64 alen = ctx->len.u[0] << 3; 556 uint64_t alen, clen;
1244 u64 clen = ctx->len.u[1] << 3;
1245#ifdef GCM_FUNCREF_4BIT
1246 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1247#endif
1248 557
1249 if (ctx->mres || ctx->ares) 558 alen = ctx->len.u[0] << 3;
1250 GCM_MUL(ctx, Xi); 559 clen = ctx->len.u[1] << 3;
560
561 if (ctx->ares > 0 || ctx->mres > 0)
562 gcm_mul(ctx, ctx->Xi.u);
1251 563
1252 ctx->Xi.u[0] ^= htobe64(alen); 564 ctx->Xi.u[0] ^= htobe64(alen);
1253 ctx->Xi.u[1] ^= htobe64(clen); 565 ctx->Xi.u[1] ^= htobe64(clen);
1254 GCM_MUL(ctx, Xi); 566 gcm_mul(ctx, ctx->Xi.u);
1255 567
1256 ctx->Xi.u[0] ^= ctx->EK0.u[0]; 568 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1257 ctx->Xi.u[1] ^= ctx->EK0.u[1]; 569 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1258 570
1259 if (tag && len <= sizeof(ctx->Xi)) 571 if (tag == NULL || len > sizeof(ctx->Xi))
1260 return memcmp(ctx->Xi.c, tag, len);
1261 else
1262 return -1; 572 return -1;
573
574 return timingsafe_memcmp(ctx->Xi.c, tag, len);
1263} 575}
1264LCRYPTO_ALIAS(CRYPTO_gcm128_finish); 576LCRYPTO_ALIAS(CRYPTO_gcm128_finish);
1265 577
@@ -1267,26 +579,10 @@ void
1267CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) 579CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1268{ 580{
1269 CRYPTO_gcm128_finish(ctx, NULL, 0); 581 CRYPTO_gcm128_finish(ctx, NULL, 0);
1270 memcpy(tag, ctx->Xi.c,
1271 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1272}
1273LCRYPTO_ALIAS(CRYPTO_gcm128_tag);
1274 582
1275GCM128_CONTEXT * 583 if (len > sizeof(ctx->Xi.c))
1276CRYPTO_gcm128_new(void *key, block128_f block) 584 len = sizeof(ctx->Xi.c);
1277{
1278 GCM128_CONTEXT *ret;
1279
1280 if ((ret = malloc(sizeof(GCM128_CONTEXT))))
1281 CRYPTO_gcm128_init(ret, key, block);
1282
1283 return ret;
1284}
1285LCRYPTO_ALIAS(CRYPTO_gcm128_new);
1286 585
1287void 586 memcpy(tag, ctx->Xi.c, len);
1288CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1289{
1290 freezero(ctx, sizeof(*ctx));
1291} 587}
1292LCRYPTO_ALIAS(CRYPTO_gcm128_release); 588LCRYPTO_ALIAS(CRYPTO_gcm128_tag);
diff --git a/src/lib/libcrypto/modes/gcm128_amd64.c b/src/lib/libcrypto/modes/gcm128_amd64.c
new file mode 100644
index 0000000000..eaa66fb32f
--- /dev/null
+++ b/src/lib/libcrypto/modes/gcm128_amd64.c
@@ -0,0 +1,44 @@
1/* $OpenBSD: gcm128_amd64.c,v 1.1 2025/06/28 12:39:10 jsing Exp $ */
2/*
3 * Copyright (c) 2025 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include "crypto_arch.h"
19#include "modes_local.h"
20
21void gcm_init_4bit(u128 Htable[16], uint64_t H[2]);
22void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
23void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
24 size_t len);
25
26void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
27void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
28void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
29 size_t len);
30
31void
32gcm128_init(GCM128_CONTEXT *ctx)
33{
34 if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_CLMUL) != 0) {
35 gcm_init_clmul(ctx->Htable, ctx->H.u);
36 ctx->gmult = gcm_gmult_clmul;
37 ctx->ghash = gcm_ghash_clmul;
38 return;
39 }
40
41 gcm_init_4bit(ctx->Htable, ctx->H.u);
42 ctx->gmult = gcm_gmult_4bit;
43 ctx->ghash = gcm_ghash_4bit;
44}
diff --git a/src/lib/libcrypto/modes/gcm128_i386.c b/src/lib/libcrypto/modes/gcm128_i386.c
new file mode 100644
index 0000000000..14b0b9ce64
--- /dev/null
+++ b/src/lib/libcrypto/modes/gcm128_i386.c
@@ -0,0 +1,56 @@
1/* $OpenBSD: gcm128_i386.c,v 1.2 2025/12/31 10:16:24 jsing Exp $ */
2/*
3 * Copyright (c) 2025 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include "crypto_arch.h"
19#include "modes_local.h"
20
21void gcm_init_4bit(u128 Htable[16], uint64_t H[2]);
22
23void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]);
24void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
25 size_t len);
26
27void gcm_gmult_4bit_x86(uint64_t Xi[2], const u128 Htable[16]);
28void gcm_ghash_4bit_x86(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
29 size_t len);
30
31void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
32void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
33void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
34 size_t len);
35
36void
37gcm128_init(GCM128_CONTEXT *ctx)
38{
39 if ((crypto_cpu_caps_i386 & CRYPTO_CPU_CAPS_I386_CLMUL) != 0) {
40 gcm_init_clmul(ctx->Htable, ctx->H.u);
41 ctx->gmult = gcm_gmult_clmul;
42 ctx->ghash = gcm_ghash_clmul;
43 return;
44 }
45
46 if ((crypto_cpu_caps_i386 & CRYPTO_CPU_CAPS_I386_SSE) != 0) {
47 gcm_init_4bit(ctx->Htable, ctx->H.u);
48 ctx->gmult = gcm_gmult_4bit_mmx;
49 ctx->ghash = gcm_ghash_4bit_mmx;
50 return;
51 }
52
53 gcm_init_4bit(ctx->Htable, ctx->H.u);
54 ctx->gmult = gcm_gmult_4bit_x86;
55 ctx->ghash = gcm_ghash_4bit_x86;
56}
diff --git a/src/lib/libcrypto/modes/modes_local.h b/src/lib/libcrypto/modes/modes_local.h
index c04db034d0..df699d3e4c 100644
--- a/src/lib/libcrypto/modes/modes_local.h
+++ b/src/lib/libcrypto/modes/modes_local.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: modes_local.h,v 1.4 2025/04/23 14:15:19 jsing Exp $ */ 1/* $OpenBSD: modes_local.h,v 1.8 2025/11/26 10:19:57 tb Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2010 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -6,6 +6,9 @@
6 * ==================================================================== 6 * ====================================================================
7 */ 7 */
8 8
9#ifndef HEADER_MODES_LOCAL_H
10#define HEADER_MODES_LOCAL_H
11
9#include <endian.h> 12#include <endian.h>
10 13
11#include <openssl/opensslconf.h> 14#include <openssl/opensslconf.h>
@@ -15,69 +18,51 @@
15__BEGIN_HIDDEN_DECLS 18__BEGIN_HIDDEN_DECLS
16 19
17#if defined(_LP64) 20#if defined(_LP64)
18typedef long i64;
19typedef unsigned long u64;
20#define U64(C) C##UL 21#define U64(C) C##UL
21#else 22#else
22typedef long long i64;
23typedef unsigned long long u64;
24#define U64(C) C##ULL 23#define U64(C) C##ULL
25#endif 24#endif
26 25
27typedef unsigned int u32;
28typedef unsigned char u8;
29
30/* GCM definitions */ 26/* GCM definitions */
31 27
32typedef struct { 28typedef struct {
33 u64 hi, lo; 29 uint64_t hi, lo;
34} u128; 30} u128;
35 31
36#ifdef TABLE_BITS
37#undef TABLE_BITS
38#endif
39/*
40 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
41 * never be set to 8 [or 1]. For further information see gcm128.c.
42 */
43#define TABLE_BITS 4
44
45struct gcm128_context { 32struct gcm128_context {
46 /* Following 6 names follow names in GCM specification */ 33 /* Following 6 names follow names in GCM specification */
47 union { 34 union {
48 u64 u[2]; 35 uint64_t u[2];
49 u32 d[4]; 36 uint32_t d[4];
50 u8 c[16]; 37 uint8_t c[16];
51 size_t t[16/sizeof(size_t)]; 38 size_t t[16/sizeof(size_t)];
52 } Yi, EKi, EK0, len, Xi, H; 39 } Yi, EKi, EK0, len, Xi, H;
53 /* Relative position of Xi, H and pre-computed Htable is used 40 /* Relative position of Xi, H and pre-computed Htable is used
54 * in some assembler modules, i.e. don't change the order! */ 41 * in some assembler modules, i.e. don't change the order! */
55#if TABLE_BITS==8
56 u128 Htable[256];
57#else
58 u128 Htable[16]; 42 u128 Htable[16];
59 void (*gmult)(u64 Xi[2], const u128 Htable[16]); 43 void (*gmult)(uint64_t Xi[2], const u128 Htable[16]);
60 void (*ghash)(u64 Xi[2], const u128 Htable[16], const u8 *inp, 44 void (*ghash)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
61 size_t len); 45 size_t len);
62#endif
63 unsigned int mres, ares; 46 unsigned int mres, ares;
64 block128_f block; 47 block128_f block;
65 void *key; 48 void *key;
66}; 49};
67 50
68struct xts128_context { 51struct xts128_context {
69 void *key1, *key2; 52 const void *key1, *key2;
70 block128_f block1, block2; 53 block128_f block1, block2;
71}; 54};
72 55
73struct ccm128_context { 56struct ccm128_context {
74 union { 57 union {
75 u64 u[2]; 58 uint64_t u[2];
76 u8 c[16]; 59 uint8_t c[16];
77 } nonce, cmac; 60 } nonce, cmac;
78 u64 blocks; 61 uint64_t blocks;
79 block128_f block; 62 block128_f block;
80 void *key; 63 void *key;
81}; 64};
82 65
83__END_HIDDEN_DECLS 66__END_HIDDEN_DECLS
67
68#endif /* HEADER_MODES_LOCAL_H */
diff --git a/src/lib/libcrypto/modes/xts128.c b/src/lib/libcrypto/modes/xts128.c
index 789af9ef65..9c863e73d6 100644
--- a/src/lib/libcrypto/modes/xts128.c
+++ b/src/lib/libcrypto/modes/xts128.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: xts128.c,v 1.14 2025/04/21 16:01:18 jsing Exp $ */ 1/* $OpenBSD: xts128.c,v 1.15 2025/05/18 09:05:59 jsing Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2011 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -61,9 +61,9 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
61 size_t len, int enc) 61 size_t len, int enc)
62{ 62{
63 union { 63 union {
64 u64 u[2]; 64 uint64_t u[2];
65 u32 d[4]; 65 uint32_t d[4];
66 u8 c[16]; 66 uint8_t c[16];
67 } tweak, scratch; 67 } tweak, scratch;
68 unsigned int i; 68 unsigned int i;
69 69
@@ -83,8 +83,8 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
83 scratch.u[0] ^= tweak.u[0]; 83 scratch.u[0] ^= tweak.u[0];
84 scratch.u[1] ^= tweak.u[1]; 84 scratch.u[1] ^= tweak.u[1];
85#else 85#else
86 scratch.u[0] = ((u64 *)inp)[0] ^ tweak.u[0]; 86 scratch.u[0] = ((uint64_t *)inp)[0] ^ tweak.u[0];
87 scratch.u[1] = ((u64 *)inp)[1] ^ tweak.u[1]; 87 scratch.u[1] = ((uint64_t *)inp)[1] ^ tweak.u[1];
88#endif 88#endif
89 (*ctx->block1)(scratch.c, scratch.c, ctx->key1); 89 (*ctx->block1)(scratch.c, scratch.c, ctx->key1);
90#ifdef __STRICT_ALIGNMENT 90#ifdef __STRICT_ALIGNMENT
@@ -92,8 +92,8 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
92 scratch.u[1] ^= tweak.u[1]; 92 scratch.u[1] ^= tweak.u[1];
93 memcpy(out, scratch.c, 16); 93 memcpy(out, scratch.c, 16);
94#else 94#else
95 ((u64 *)out)[0] = scratch.u[0] ^= tweak.u[0]; 95 ((uint64_t *)out)[0] = scratch.u[0] ^= tweak.u[0];
96 ((u64 *)out)[1] = scratch.u[1] ^= tweak.u[1]; 96 ((uint64_t *)out)[1] = scratch.u[1] ^= tweak.u[1];
97#endif 97#endif
98 inp += 16; 98 inp += 16;
99 out += 16; 99 out += 16;
@@ -115,15 +115,15 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
115 for (c = 0, i = 0; i < 16; ++i) { 115 for (c = 0, i = 0; i < 16; ++i) {
116 /*+ substitutes for |, because c is 1 bit */ 116 /*+ substitutes for |, because c is 1 bit */
117 c += ((size_t)tweak.c[i]) << 1; 117 c += ((size_t)tweak.c[i]) << 1;
118 tweak.c[i] = (u8)c; 118 tweak.c[i] = (uint8_t)c;
119 c = c >> 8; 119 c = c >> 8;
120 } 120 }
121 tweak.c[0] ^= (u8)(0x87 & (0 - c)); 121 tweak.c[0] ^= (uint8_t)(0x87 & (0 - c));
122#endif 122#endif
123 } 123 }
124 if (enc) { 124 if (enc) {
125 for (i = 0; i < len; ++i) { 125 for (i = 0; i < len; ++i) {
126 u8 ch = inp[i]; 126 uint8_t ch = inp[i];
127 out[i] = scratch.c[i]; 127 out[i] = scratch.c[i];
128 scratch.c[i] = ch; 128 scratch.c[i] = ch;
129 } 129 }
@@ -135,8 +135,8 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
135 memcpy(out - 16, scratch.c, 16); 135 memcpy(out - 16, scratch.c, 16);
136 } else { 136 } else {
137 union { 137 union {
138 u64 u[2]; 138 uint64_t u[2];
139 u8 c[16]; 139 uint8_t c[16];
140 } tweak1; 140 } tweak1;
141 141
142#if BYTE_ORDER == LITTLE_ENDIAN 142#if BYTE_ORDER == LITTLE_ENDIAN
@@ -152,25 +152,25 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
152 for (c = 0, i = 0; i < 16; ++i) { 152 for (c = 0, i = 0; i < 16; ++i) {
153 /*+ substitutes for |, because c is 1 bit */ 153 /*+ substitutes for |, because c is 1 bit */
154 c += ((size_t)tweak.c[i]) << 1; 154 c += ((size_t)tweak.c[i]) << 1;
155 tweak1.c[i] = (u8)c; 155 tweak1.c[i] = (uint8_t)c;
156 c = c >> 8; 156 c = c >> 8;
157 } 157 }
158 tweak1.c[0] ^= (u8)(0x87 & (0 - c)); 158 tweak1.c[0] ^= (uint8_t)(0x87 & (0 - c));
159#endif 159#endif
160#ifdef __STRICT_ALIGNMENT 160#ifdef __STRICT_ALIGNMENT
161 memcpy(scratch.c, inp, 16); 161 memcpy(scratch.c, inp, 16);
162 scratch.u[0] ^= tweak1.u[0]; 162 scratch.u[0] ^= tweak1.u[0];
163 scratch.u[1] ^= tweak1.u[1]; 163 scratch.u[1] ^= tweak1.u[1];
164#else 164#else
165 scratch.u[0] = ((u64 *)inp)[0] ^ tweak1.u[0]; 165 scratch.u[0] = ((uint64_t *)inp)[0] ^ tweak1.u[0];
166 scratch.u[1] = ((u64 *)inp)[1] ^ tweak1.u[1]; 166 scratch.u[1] = ((uint64_t *)inp)[1] ^ tweak1.u[1];
167#endif 167#endif
168 (*ctx->block1)(scratch.c, scratch.c, ctx->key1); 168 (*ctx->block1)(scratch.c, scratch.c, ctx->key1);
169 scratch.u[0] ^= tweak1.u[0]; 169 scratch.u[0] ^= tweak1.u[0];
170 scratch.u[1] ^= tweak1.u[1]; 170 scratch.u[1] ^= tweak1.u[1];
171 171
172 for (i = 0; i < len; ++i) { 172 for (i = 0; i < len; ++i) {
173 u8 ch = inp[16 + i]; 173 uint8_t ch = inp[16 + i];
174 out[16 + i] = scratch.c[i]; 174 out[16 + i] = scratch.c[i];
175 scratch.c[i] = ch; 175 scratch.c[i] = ch;
176 } 176 }
@@ -182,8 +182,8 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
182 scratch.u[1] ^= tweak.u[1]; 182 scratch.u[1] ^= tweak.u[1];
183 memcpy(out, scratch.c, 16); 183 memcpy(out, scratch.c, 16);
184#else 184#else
185 ((u64 *)out)[0] = scratch.u[0] ^ tweak.u[0]; 185 ((uint64_t *)out)[0] = scratch.u[0] ^ tweak.u[0];
186 ((u64 *)out)[1] = scratch.u[1] ^ tweak.u[1]; 186 ((uint64_t *)out)[1] = scratch.u[1] ^ tweak.u[1];
187#endif 187#endif
188 } 188 }
189 189