summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/modes
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/lib/libcrypto/modes/asm/ghash-x86.pl3
-rw-r--r--src/lib/libcrypto/modes/cbc128.c16
-rw-r--r--src/lib/libcrypto/modes/ccm128.c100
-rw-r--r--src/lib/libcrypto/modes/cfb128.c16
-rw-r--r--src/lib/libcrypto/modes/ctr128.c48
-rw-r--r--src/lib/libcrypto/modes/gcm128.c1129
-rw-r--r--src/lib/libcrypto/modes/gcm128_amd64.c44
-rw-r--r--src/lib/libcrypto/modes/gcm128_i386.c56
-rw-r--r--src/lib/libcrypto/modes/modes_local.h80
-rw-r--r--src/lib/libcrypto/modes/ofb128.c14
-rw-r--r--src/lib/libcrypto/modes/xts128.c51
11 files changed, 390 insertions, 1167 deletions
diff --git a/src/lib/libcrypto/modes/asm/ghash-x86.pl b/src/lib/libcrypto/modes/asm/ghash-x86.pl
index 47833582b6..395c680cc5 100644
--- a/src/lib/libcrypto/modes/asm/ghash-x86.pl
+++ b/src/lib/libcrypto/modes/asm/ghash-x86.pl
@@ -119,8 +119,7 @@ require "x86asm.pl";
119 119
120&asm_init($ARGV[0],"ghash-x86.pl",$x86only = $ARGV[$#ARGV] eq "386"); 120&asm_init($ARGV[0],"ghash-x86.pl",$x86only = $ARGV[$#ARGV] eq "386");
121 121
122$sse2=0; 122$sse2=1;
123for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
124 123
125($Zhh,$Zhl,$Zlh,$Zll) = ("ebp","edx","ecx","ebx"); 124($Zhh,$Zhl,$Zlh,$Zll) = ("ebp","edx","ecx","ebx");
126$inp = "edi"; 125$inp = "edi";
diff --git a/src/lib/libcrypto/modes/cbc128.c b/src/lib/libcrypto/modes/cbc128.c
index f8ebf79a87..1b6858ee25 100644
--- a/src/lib/libcrypto/modes/cbc128.c
+++ b/src/lib/libcrypto/modes/cbc128.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: cbc128.c,v 1.8 2023/07/08 14:56:54 beck Exp $ */ 1/* $OpenBSD: cbc128.c,v 1.11 2025/04/23 10:09:08 jsing Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2008 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -49,15 +49,11 @@
49 * 49 *
50 */ 50 */
51 51
52#include <openssl/crypto.h>
53#include "modes_local.h"
54#include <string.h> 52#include <string.h>
55 53
56#ifndef MODES_DEBUG 54#include <openssl/crypto.h>
57# ifndef NDEBUG 55
58# define NDEBUG 56#include "modes_local.h"
59# endif
60#endif
61 57
62#undef STRICT_ALIGNMENT 58#undef STRICT_ALIGNMENT
63#ifdef __STRICT_ALIGNMENT 59#ifdef __STRICT_ALIGNMENT
@@ -74,7 +70,6 @@ CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out,
74 size_t n; 70 size_t n;
75 const unsigned char *iv = ivec; 71 const unsigned char *iv = ivec;
76 72
77#if !defined(OPENSSL_SMALL_FOOTPRINT)
78 if (STRICT_ALIGNMENT && 73 if (STRICT_ALIGNMENT &&
79 ((size_t)in|(size_t)out|(size_t)ivec) % sizeof(size_t) != 0) { 74 ((size_t)in|(size_t)out|(size_t)ivec) % sizeof(size_t) != 0) {
80 while (len >= 16) { 75 while (len >= 16) {
@@ -98,7 +93,6 @@ CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out,
98 out += 16; 93 out += 16;
99 } 94 }
100 } 95 }
101#endif
102 while (len) { 96 while (len) {
103 for (n = 0; n < 16 && n < len; ++n) 97 for (n = 0; n < 16 && n < len; ++n)
104 out[n] = in[n] ^ iv[n]; 98 out[n] = in[n] ^ iv[n];
@@ -127,7 +121,6 @@ CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out,
127 unsigned char c[16]; 121 unsigned char c[16];
128 } tmp; 122 } tmp;
129 123
130#if !defined(OPENSSL_SMALL_FOOTPRINT)
131 if (in != out) { 124 if (in != out) {
132 const unsigned char *iv = ivec; 125 const unsigned char *iv = ivec;
133 126
@@ -192,7 +185,6 @@ CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out,
192 } 185 }
193 } 186 }
194 } 187 }
195#endif
196 while (len) { 188 while (len) {
197 unsigned char c; 189 unsigned char c;
198 (*block)(in, tmp.c, key); 190 (*block)(in, tmp.c, key);
diff --git a/src/lib/libcrypto/modes/ccm128.c b/src/lib/libcrypto/modes/ccm128.c
index 68c5cce5da..e27681ee62 100644
--- a/src/lib/libcrypto/modes/ccm128.c
+++ b/src/lib/libcrypto/modes/ccm128.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: ccm128.c,v 1.8 2023/07/08 14:56:54 beck Exp $ */ 1/* $OpenBSD: ccm128.c,v 1.12 2025/05/18 09:21:29 bcook Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2011 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -48,15 +48,11 @@
48 * ==================================================================== 48 * ====================================================================
49 */ 49 */
50 50
51#include <openssl/crypto.h>
52#include "modes_local.h"
53#include <string.h> 51#include <string.h>
54 52
55#ifndef MODES_DEBUG 53#include <openssl/crypto.h>
56# ifndef NDEBUG 54
57# define NDEBUG 55#include "modes_local.h"
58# endif
59#endif
60 56
61/* First you setup M and L parameters and pass the key schedule. 57/* First you setup M and L parameters and pass the key schedule.
62 * This is called once per session setup... */ 58 * This is called once per session setup... */
@@ -65,7 +61,7 @@ CRYPTO_ccm128_init(CCM128_CONTEXT *ctx,
65 unsigned int M, unsigned int L, void *key, block128_f block) 61 unsigned int M, unsigned int L, void *key, block128_f block)
66{ 62{
67 memset(ctx->nonce.c, 0, sizeof(ctx->nonce.c)); 63 memset(ctx->nonce.c, 0, sizeof(ctx->nonce.c));
68 ctx->nonce.c[0] = ((u8)(L - 1) & 7) | (u8)(((M - 2)/2) & 7) << 3; 64 ctx->nonce.c[0] = ((uint8_t)(L - 1) & 7) | (uint8_t)(((M - 2)/2) & 7) << 3;
69 ctx->blocks = 0; 65 ctx->blocks = 0;
70 ctx->block = block; 66 ctx->block = block;
71 ctx->key = key; 67 ctx->key = key;
@@ -85,17 +81,17 @@ CRYPTO_ccm128_setiv(CCM128_CONTEXT *ctx,
85 return -1; /* nonce is too short */ 81 return -1; /* nonce is too short */
86 82
87 if (sizeof(mlen) == 8 && L >= 3) { 83 if (sizeof(mlen) == 8 && L >= 3) {
88 ctx->nonce.c[8] = (u8)(mlen >> (56 % (sizeof(mlen)*8))); 84 ctx->nonce.c[8] = (uint8_t)(mlen >> (56 % (sizeof(mlen)*8)));
89 ctx->nonce.c[9] = (u8)(mlen >> (48 % (sizeof(mlen)*8))); 85 ctx->nonce.c[9] = (uint8_t)(mlen >> (48 % (sizeof(mlen)*8)));
90 ctx->nonce.c[10] = (u8)(mlen >> (40 % (sizeof(mlen)*8))); 86 ctx->nonce.c[10] = (uint8_t)(mlen >> (40 % (sizeof(mlen)*8)));
91 ctx->nonce.c[11] = (u8)(mlen >> (32 % (sizeof(mlen)*8))); 87 ctx->nonce.c[11] = (uint8_t)(mlen >> (32 % (sizeof(mlen)*8)));
92 } else 88 } else
93 ctx->nonce.u[1] = 0; 89 ctx->nonce.u[1] = 0;
94 90
95 ctx->nonce.c[12] = (u8)(mlen >> 24); 91 ctx->nonce.c[12] = (uint8_t)(mlen >> 24);
96 ctx->nonce.c[13] = (u8)(mlen >> 16); 92 ctx->nonce.c[13] = (uint8_t)(mlen >> 16);
97 ctx->nonce.c[14] = (u8)(mlen >> 8); 93 ctx->nonce.c[14] = (uint8_t)(mlen >> 8);
98 ctx->nonce.c[15] = (u8)mlen; 94 ctx->nonce.c[15] = (uint8_t)mlen;
99 95
100 ctx->nonce.c[0] &= ~0x40; /* clear Adata flag */ 96 ctx->nonce.c[0] &= ~0x40; /* clear Adata flag */
101 memcpy(&ctx->nonce.c[1], nonce, 14 - L); 97 memcpy(&ctx->nonce.c[1], nonce, 14 - L);
@@ -120,29 +116,29 @@ CRYPTO_ccm128_aad(CCM128_CONTEXT *ctx,
120 ctx->blocks++; 116 ctx->blocks++;
121 117
122 if (alen < (0x10000 - 0x100)) { 118 if (alen < (0x10000 - 0x100)) {
123 ctx->cmac.c[0] ^= (u8)(alen >> 8); 119 ctx->cmac.c[0] ^= (uint8_t)(alen >> 8);
124 ctx->cmac.c[1] ^= (u8)alen; 120 ctx->cmac.c[1] ^= (uint8_t)alen;
125 i = 2; 121 i = 2;
126 } else if (sizeof(alen) == 8 && 122 } else if (sizeof(alen) == 8 &&
127 alen >= (size_t)1 << (32 % (sizeof(alen)*8))) { 123 alen >= (size_t)1 << (32 % (sizeof(alen)*8))) {
128 ctx->cmac.c[0] ^= 0xFF; 124 ctx->cmac.c[0] ^= 0xFF;
129 ctx->cmac.c[1] ^= 0xFF; 125 ctx->cmac.c[1] ^= 0xFF;
130 ctx->cmac.c[2] ^= (u8)(alen >> (56 % (sizeof(alen)*8))); 126 ctx->cmac.c[2] ^= (uint8_t)(alen >> (56 % (sizeof(alen)*8)));
131 ctx->cmac.c[3] ^= (u8)(alen >> (48 % (sizeof(alen)*8))); 127 ctx->cmac.c[3] ^= (uint8_t)(alen >> (48 % (sizeof(alen)*8)));
132 ctx->cmac.c[4] ^= (u8)(alen >> (40 % (sizeof(alen)*8))); 128 ctx->cmac.c[4] ^= (uint8_t)(alen >> (40 % (sizeof(alen)*8)));
133 ctx->cmac.c[5] ^= (u8)(alen >> (32 % (sizeof(alen)*8))); 129 ctx->cmac.c[5] ^= (uint8_t)(alen >> (32 % (sizeof(alen)*8)));
134 ctx->cmac.c[6] ^= (u8)(alen >> 24); 130 ctx->cmac.c[6] ^= (uint8_t)(alen >> 24);
135 ctx->cmac.c[7] ^= (u8)(alen >> 16); 131 ctx->cmac.c[7] ^= (uint8_t)(alen >> 16);
136 ctx->cmac.c[8] ^= (u8)(alen >> 8); 132 ctx->cmac.c[8] ^= (uint8_t)(alen >> 8);
137 ctx->cmac.c[9] ^= (u8)alen; 133 ctx->cmac.c[9] ^= (uint8_t)alen;
138 i = 10; 134 i = 10;
139 } else { 135 } else {
140 ctx->cmac.c[0] ^= 0xFF; 136 ctx->cmac.c[0] ^= 0xFF;
141 ctx->cmac.c[1] ^= 0xFE; 137 ctx->cmac.c[1] ^= 0xFE;
142 ctx->cmac.c[2] ^= (u8)(alen >> 24); 138 ctx->cmac.c[2] ^= (uint8_t)(alen >> 24);
143 ctx->cmac.c[3] ^= (u8)(alen >> 16); 139 ctx->cmac.c[3] ^= (uint8_t)(alen >> 16);
144 ctx->cmac.c[4] ^= (u8)(alen >> 8); 140 ctx->cmac.c[4] ^= (uint8_t)(alen >> 8);
145 ctx->cmac.c[5] ^= (u8)alen; 141 ctx->cmac.c[5] ^= (uint8_t)alen;
146 i = 6; 142 i = 6;
147 } 143 }
148 144
@@ -164,7 +160,7 @@ static void
164ctr64_inc(unsigned char *counter) 160ctr64_inc(unsigned char *counter)
165{ 161{
166 unsigned int n = 8; 162 unsigned int n = 8;
167 u8 c; 163 uint8_t c;
168 164
169 counter += 8; 165 counter += 8;
170 do { 166 do {
@@ -188,8 +184,8 @@ CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx,
188 block128_f block = ctx->block; 184 block128_f block = ctx->block;
189 void *key = ctx->key; 185 void *key = ctx->key;
190 union { 186 union {
191 u64 u[2]; 187 uint64_t u[2];
192 u8 c[16]; 188 uint8_t c[16];
193 } scratch; 189 } scratch;
194 190
195 if (!(flags0 & 0x40)) 191 if (!(flags0 & 0x40))
@@ -215,16 +211,16 @@ CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx,
215 while (len >= 16) { 211 while (len >= 16) {
216#ifdef __STRICT_ALIGNMENT 212#ifdef __STRICT_ALIGNMENT
217 union { 213 union {
218 u64 u[2]; 214 uint64_t u[2];
219 u8 c[16]; 215 uint8_t c[16];
220 } temp; 216 } temp;
221 217
222 memcpy(temp.c, inp, 16); 218 memcpy(temp.c, inp, 16);
223 ctx->cmac.u[0] ^= temp.u[0]; 219 ctx->cmac.u[0] ^= temp.u[0];
224 ctx->cmac.u[1] ^= temp.u[1]; 220 ctx->cmac.u[1] ^= temp.u[1];
225#else 221#else
226 ctx->cmac.u[0] ^= ((u64 *)inp)[0]; 222 ctx->cmac.u[0] ^= ((uint64_t *)inp)[0];
227 ctx->cmac.u[1] ^= ((u64 *)inp)[1]; 223 ctx->cmac.u[1] ^= ((uint64_t *)inp)[1];
228#endif 224#endif
229 (*block)(ctx->cmac.c, ctx->cmac.c, key); 225 (*block)(ctx->cmac.c, ctx->cmac.c, key);
230 (*block)(ctx->nonce.c, scratch.c, key); 226 (*block)(ctx->nonce.c, scratch.c, key);
@@ -234,8 +230,8 @@ CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx,
234 temp.u[1] ^= scratch.u[1]; 230 temp.u[1] ^= scratch.u[1];
235 memcpy(out, temp.c, 16); 231 memcpy(out, temp.c, 16);
236#else 232#else
237 ((u64 *)out)[0] = scratch.u[0] ^ ((u64 *)inp)[0]; 233 ((uint64_t *)out)[0] = scratch.u[0] ^ ((uint64_t *)inp)[0];
238 ((u64 *)out)[1] = scratch.u[1] ^ ((u64 *)inp)[1]; 234 ((uint64_t *)out)[1] = scratch.u[1] ^ ((uint64_t *)inp)[1];
239#endif 235#endif
240 inp += 16; 236 inp += 16;
241 out += 16; 237 out += 16;
@@ -275,8 +271,8 @@ CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx,
275 block128_f block = ctx->block; 271 block128_f block = ctx->block;
276 void *key = ctx->key; 272 void *key = ctx->key;
277 union { 273 union {
278 u64 u[2]; 274 uint64_t u[2];
279 u8 c[16]; 275 uint8_t c[16];
280 } scratch; 276 } scratch;
281 277
282 if (!(flags0 & 0x40)) 278 if (!(flags0 & 0x40))
@@ -297,8 +293,8 @@ CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx,
297 while (len >= 16) { 293 while (len >= 16) {
298#ifdef __STRICT_ALIGNMENT 294#ifdef __STRICT_ALIGNMENT
299 union { 295 union {
300 u64 u[2]; 296 uint64_t u[2];
301 u8 c[16]; 297 uint8_t c[16];
302 } temp; 298 } temp;
303#endif 299#endif
304 (*block)(ctx->nonce.c, scratch.c, key); 300 (*block)(ctx->nonce.c, scratch.c, key);
@@ -309,10 +305,10 @@ CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx,
309 ctx->cmac.u[1] ^= (scratch.u[1] ^= temp.u[1]); 305 ctx->cmac.u[1] ^= (scratch.u[1] ^= temp.u[1]);
310 memcpy(out, scratch.c, 16); 306 memcpy(out, scratch.c, 16);
311#else 307#else
312 ctx->cmac.u[0] ^= (((u64 *)out)[0] = scratch.u[0] ^ 308 ctx->cmac.u[0] ^= (((uint64_t *)out)[0] = scratch.u[0] ^
313 ((u64 *)inp)[0]); 309 ((uint64_t *)inp)[0]);
314 ctx->cmac.u[1] ^= (((u64 *)out)[1] = scratch.u[1] ^ 310 ctx->cmac.u[1] ^= (((uint64_t *)out)[1] = scratch.u[1] ^
315 ((u64 *)inp)[1]); 311 ((uint64_t *)inp)[1]);
316#endif 312#endif
317 (*block)(ctx->cmac.c, ctx->cmac.c, key); 313 (*block)(ctx->cmac.c, ctx->cmac.c, key);
318 314
@@ -367,8 +363,8 @@ CRYPTO_ccm128_encrypt_ccm64(CCM128_CONTEXT *ctx,
367 block128_f block = ctx->block; 363 block128_f block = ctx->block;
368 void *key = ctx->key; 364 void *key = ctx->key;
369 union { 365 union {
370 u64 u[2]; 366 uint64_t u[2];
371 u8 c[16]; 367 uint8_t c[16];
372 } scratch; 368 } scratch;
373 369
374 if (!(flags0 & 0x40)) 370 if (!(flags0 & 0x40))
@@ -434,8 +430,8 @@ CRYPTO_ccm128_decrypt_ccm64(CCM128_CONTEXT *ctx,
434 block128_f block = ctx->block; 430 block128_f block = ctx->block;
435 void *key = ctx->key; 431 void *key = ctx->key;
436 union { 432 union {
437 u64 u[2]; 433 uint64_t u[2];
438 u8 c[16]; 434 uint8_t c[16];
439 } scratch; 435 } scratch;
440 436
441 if (!(flags0 & 0x40)) 437 if (!(flags0 & 0x40))
diff --git a/src/lib/libcrypto/modes/cfb128.c b/src/lib/libcrypto/modes/cfb128.c
index 931353a620..9a63a46724 100644
--- a/src/lib/libcrypto/modes/cfb128.c
+++ b/src/lib/libcrypto/modes/cfb128.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: cfb128.c,v 1.7 2023/07/08 14:56:54 beck Exp $ */ 1/* $OpenBSD: cfb128.c,v 1.10 2025/04/23 10:09:08 jsing Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2008 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -49,15 +49,11 @@
49 * 49 *
50 */ 50 */
51 51
52#include <openssl/crypto.h>
53#include "modes_local.h"
54#include <string.h> 52#include <string.h>
55 53
56#ifndef MODES_DEBUG 54#include <openssl/crypto.h>
57# ifndef NDEBUG 55
58# define NDEBUG 56#include "modes_local.h"
59# endif
60#endif
61 57
62/* The input and output encrypted as though 128bit cfb mode is being 58/* The input and output encrypted as though 128bit cfb mode is being
63 * used. The extra state information to record how much of the 59 * used. The extra state information to record how much of the
@@ -75,7 +71,6 @@ CRYPTO_cfb128_encrypt(const unsigned char *in, unsigned char *out,
75 n = *num; 71 n = *num;
76 72
77 if (enc) { 73 if (enc) {
78#if !defined(OPENSSL_SMALL_FOOTPRINT)
79 if (16 % sizeof(size_t) == 0) 74 if (16 % sizeof(size_t) == 0)
80 do { /* always true actually */ 75 do { /* always true actually */
81 while (n && len) { 76 while (n && len) {
@@ -111,7 +106,6 @@ CRYPTO_cfb128_encrypt(const unsigned char *in, unsigned char *out,
111 return; 106 return;
112 } while (0); 107 } while (0);
113 /* the rest would be commonly eliminated by x86* compiler */ 108 /* the rest would be commonly eliminated by x86* compiler */
114#endif
115 while (l < len) { 109 while (l < len) {
116 if (n == 0) { 110 if (n == 0) {
117 (*block)(ivec, ivec, key); 111 (*block)(ivec, ivec, key);
@@ -122,7 +116,6 @@ CRYPTO_cfb128_encrypt(const unsigned char *in, unsigned char *out,
122 } 116 }
123 *num = n; 117 *num = n;
124 } else { 118 } else {
125#if !defined(OPENSSL_SMALL_FOOTPRINT)
126 if (16 % sizeof(size_t) == 0) 119 if (16 % sizeof(size_t) == 0)
127 do { /* always true actually */ 120 do { /* always true actually */
128 while (n && len) { 121 while (n && len) {
@@ -163,7 +156,6 @@ CRYPTO_cfb128_encrypt(const unsigned char *in, unsigned char *out,
163 return; 156 return;
164 } while (0); 157 } while (0);
165 /* the rest would be commonly eliminated by x86* compiler */ 158 /* the rest would be commonly eliminated by x86* compiler */
166#endif
167 while (l < len) { 159 while (l < len) {
168 unsigned char c; 160 unsigned char c;
169 if (n == 0) { 161 if (n == 0) {
diff --git a/src/lib/libcrypto/modes/ctr128.c b/src/lib/libcrypto/modes/ctr128.c
index 6d507dfc3a..87d9abb355 100644
--- a/src/lib/libcrypto/modes/ctr128.c
+++ b/src/lib/libcrypto/modes/ctr128.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: ctr128.c,v 1.11 2023/07/08 14:56:54 beck Exp $ */ 1/* $OpenBSD: ctr128.c,v 1.18 2025/05/18 09:05:59 jsing Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2008 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -49,16 +49,12 @@
49 * 49 *
50 */ 50 */
51 51
52#include <openssl/crypto.h>
53#include "modes_local.h"
54#include <string.h> 52#include <string.h>
55 53
56#ifndef MODES_DEBUG 54#include <openssl/crypto.h>
57# ifndef NDEBUG 55
58# define NDEBUG 56#include "crypto_internal.h"
59# endif 57#include "modes_local.h"
60#endif
61#include <assert.h>
62 58
63/* NOTE: the IV/counter CTR mode is big-endian. The code itself 59/* NOTE: the IV/counter CTR mode is big-endian. The code itself
64 * is endian-neutral. */ 60 * is endian-neutral. */
@@ -67,8 +63,8 @@
67static void 63static void
68ctr128_inc(unsigned char *counter) 64ctr128_inc(unsigned char *counter)
69{ 65{
70 u32 n = 16; 66 uint32_t n = 16;
71 u8 c; 67 uint8_t c;
72 68
73 do { 69 do {
74 --n; 70 --n;
@@ -80,7 +76,6 @@ ctr128_inc(unsigned char *counter)
80 } while (n); 76 } while (n);
81} 77}
82 78
83#if !defined(OPENSSL_SMALL_FOOTPRINT)
84static void 79static void
85ctr128_inc_aligned(unsigned char *counter) 80ctr128_inc_aligned(unsigned char *counter)
86{ 81{
@@ -100,7 +95,6 @@ ctr128_inc_aligned(unsigned char *counter)
100 } while (n); 95 } while (n);
101#endif 96#endif
102} 97}
103#endif
104 98
105/* The input encrypted as though 128bit counter mode is being 99/* The input encrypted as though 128bit counter mode is being
106 * used. The extra state information to record how much of the 100 * used. The extra state information to record how much of the
@@ -121,14 +115,11 @@ CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out,
121 unsigned char ivec[16], unsigned char ecount_buf[16], 115 unsigned char ivec[16], unsigned char ecount_buf[16],
122 unsigned int *num, block128_f block) 116 unsigned int *num, block128_f block)
123{ 117{
124 unsigned int n; 118 unsigned int n = *num;
125 size_t l = 0; 119 size_t l = 0;
126 120
127 assert(*num < 16); 121 OPENSSL_assert(n < 16);
128
129 n = *num;
130 122
131#if !defined(OPENSSL_SMALL_FOOTPRINT)
132 if (16 % sizeof(size_t) == 0) 123 if (16 % sizeof(size_t) == 0)
133 do { /* always true actually */ 124 do { /* always true actually */
134 while (n && len) { 125 while (n && len) {
@@ -166,7 +157,6 @@ CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out,
166 return; 157 return;
167 } while (0); 158 } while (0);
168 /* the rest would be commonly eliminated by x86* compiler */ 159 /* the rest would be commonly eliminated by x86* compiler */
169#endif
170 while (l < len) { 160 while (l < len) {
171 if (n == 0) { 161 if (n == 0) {
172 (*block)(ivec, ecount_buf, key); 162 (*block)(ivec, ecount_buf, key);
@@ -185,8 +175,8 @@ LCRYPTO_ALIAS(CRYPTO_ctr128_encrypt);
185static void 175static void
186ctr96_inc(unsigned char *counter) 176ctr96_inc(unsigned char *counter)
187{ 177{
188 u32 n = 12; 178 uint32_t n = 12;
189 u8 c; 179 uint8_t c;
190 180
191 do { 181 do {
192 --n; 182 --n;
@@ -204,11 +194,10 @@ CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out,
204 unsigned char ivec[16], unsigned char ecount_buf[16], 194 unsigned char ivec[16], unsigned char ecount_buf[16],
205 unsigned int *num, ctr128_f func) 195 unsigned int *num, ctr128_f func)
206{ 196{
207 unsigned int n, ctr32; 197 unsigned int n = *num;
198 unsigned int ctr32;
208 199
209 assert(*num < 16); 200 OPENSSL_assert(n < 16);
210
211 n = *num;
212 201
213 while (n && len) { 202 while (n && len) {
214 *(out++) = *(in++) ^ ecount_buf[n]; 203 *(out++) = *(in++) ^ ecount_buf[n];
@@ -216,7 +205,8 @@ CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out,
216 n = (n + 1) % 16; 205 n = (n + 1) % 16;
217 } 206 }
218 207
219 ctr32 = GETU32(ivec + 12); 208 ctr32 = crypto_load_be32toh(&ivec[12]);
209
220 while (len >= 16) { 210 while (len >= 16) {
221 size_t blocks = len/16; 211 size_t blocks = len/16;
222 /* 212 /*
@@ -233,14 +223,14 @@ CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out,
233 * overflow, which is then handled by limiting the 223 * overflow, which is then handled by limiting the
234 * amount of blocks to the exact overflow point... 224 * amount of blocks to the exact overflow point...
235 */ 225 */
236 ctr32 += (u32)blocks; 226 ctr32 += (uint32_t)blocks;
237 if (ctr32 < blocks) { 227 if (ctr32 < blocks) {
238 blocks -= ctr32; 228 blocks -= ctr32;
239 ctr32 = 0; 229 ctr32 = 0;
240 } 230 }
241 (*func)(in, out, blocks, key, ivec); 231 (*func)(in, out, blocks, key, ivec);
242 /* (*ctr) does not update ivec, caller does: */ 232 /* (*ctr) does not update ivec, caller does: */
243 PUTU32(ivec + 12, ctr32); 233 crypto_store_htobe32(&ivec[12], ctr32);
244 /* ... overflow was detected, propagate carry. */ 234 /* ... overflow was detected, propagate carry. */
245 if (ctr32 == 0) 235 if (ctr32 == 0)
246 ctr96_inc(ivec); 236 ctr96_inc(ivec);
@@ -253,7 +243,7 @@ CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out,
253 memset(ecount_buf, 0, 16); 243 memset(ecount_buf, 0, 16);
254 (*func)(ecount_buf, ecount_buf, 1, key, ivec); 244 (*func)(ecount_buf, ecount_buf, 1, key, ivec);
255 ++ctr32; 245 ++ctr32;
256 PUTU32(ivec + 12, ctr32); 246 crypto_store_htobe32(&ivec[12], ctr32);
257 if (ctr32 == 0) 247 if (ctr32 == 0)
258 ctr96_inc(ivec); 248 ctr96_inc(ivec);
259 while (len--) { 249 while (len--) {
diff --git a/src/lib/libcrypto/modes/gcm128.c b/src/lib/libcrypto/modes/gcm128.c
index 6c89bd44b7..b6874296e0 100644
--- a/src/lib/libcrypto/modes/gcm128.c
+++ b/src/lib/libcrypto/modes/gcm128.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: gcm128.c,v 1.27 2024/09/06 09:57:32 tb Exp $ */ 1/* $OpenBSD: gcm128.c,v 1.54 2025/06/28 12:39:10 jsing Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2010 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -48,8 +48,6 @@
48 * ==================================================================== 48 * ====================================================================
49 */ 49 */
50 50
51#define OPENSSL_FIPSAPI
52
53#include <string.h> 51#include <string.h>
54 52
55#include <openssl/crypto.h> 53#include <openssl/crypto.h>
@@ -57,284 +55,54 @@
57#include "crypto_internal.h" 55#include "crypto_internal.h"
58#include "modes_local.h" 56#include "modes_local.h"
59 57
60#ifndef MODES_DEBUG 58void
61# ifndef NDEBUG 59gcm_init_4bit(u128 Htable[16], uint64_t H[2])
62# define NDEBUG
63# endif
64#endif
65
66#if defined(BSWAP4) && defined(__STRICT_ALIGNMENT)
67/* redefine, because alignment is ensured */
68#undef GETU32
69#define GETU32(p) BSWAP4(*(const u32 *)(p))
70#endif
71
72#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
73#define REDUCE1BIT(V) \
74 do { \
75 if (sizeof(size_t)==8) { \
76 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
77 V.lo = (V.hi<<63)|(V.lo>>1); \
78 V.hi = (V.hi>>1 )^T; \
79 } else { \
80 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
81 V.lo = (V.hi<<63)|(V.lo>>1); \
82 V.hi = (V.hi>>1 )^((u64)T<<32); \
83 } \
84 } while(0)
85
86/*
87 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
88 * never be set to 8. 8 is effectively reserved for testing purposes.
89 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
90 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
91 * whole spectrum of possible table driven implementations. Why? In
92 * non-"Shoup's" case memory access pattern is segmented in such manner,
93 * that it's trivial to see that cache timing information can reveal
94 * fair portion of intermediate hash value. Given that ciphertext is
95 * always available to attacker, it's possible for him to attempt to
96 * deduce secret parameter H and if successful, tamper with messages
97 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
98 * not as trivial, but there is no reason to believe that it's resistant
99 * to cache-timing attack. And the thing about "8-bit" implementation is
100 * that it consumes 16 (sixteen) times more memory, 4KB per individual
101 * key + 1KB shared. Well, on pros side it should be twice as fast as
102 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
103 * was observed to run ~75% faster, closer to 100% for commercial
104 * compilers... Yet "4-bit" procedure is preferred, because it's
105 * believed to provide better security-performance balance and adequate
106 * all-round performance. "All-round" refers to things like:
107 *
108 * - shorter setup time effectively improves overall timing for
109 * handling short messages;
110 * - larger table allocation can become unbearable because of VM
111 * subsystem penalties (for example on Windows large enough free
112 * results in VM working set trimming, meaning that consequent
113 * malloc would immediately incur working set expansion);
114 * - larger table has larger cache footprint, which can affect
115 * performance of other code paths (not necessarily even from same
116 * thread in Hyper-Threading world);
117 *
118 * Value of 1 is not appropriate for performance reasons.
119 */
120#if TABLE_BITS==8
121
122static void
123gcm_init_8bit(u128 Htable[256], u64 H[2])
124{
125 int i, j;
126 u128 V;
127
128 Htable[0].hi = 0;
129 Htable[0].lo = 0;
130 V.hi = H[0];
131 V.lo = H[1];
132
133 for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
134 REDUCE1BIT(V);
135 Htable[i] = V;
136 }
137
138 for (i = 2; i < 256; i <<= 1) {
139 u128 *Hi = Htable + i, H0 = *Hi;
140 for (j = 1; j < i; ++j) {
141 Hi[j].hi = H0.hi ^ Htable[j].hi;
142 Hi[j].lo = H0.lo ^ Htable[j].lo;
143 }
144 }
145}
146
147static void
148gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
149{
150 u128 Z = { 0, 0};
151 const u8 *xi = (const u8 *)Xi + 15;
152 size_t rem, n = *xi;
153 static const size_t rem_8bit[256] = {
154 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
155 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
156 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
157 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
158 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
159 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
160 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
161 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
162 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
163 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
164 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
165 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
166 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
167 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
168 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
169 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
170 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
171 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
172 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
173 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
174 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
175 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
176 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
177 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
178 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
179 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
180 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
181 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
182 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
183 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
184 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
185 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
186 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
187 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
188 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
189 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
190 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
191 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
192 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
193 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
194 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
195 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
196 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
197 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
198 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
199 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
200 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
201 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
202 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
203 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
204 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
205 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
206 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
207 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
208 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
209 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
210 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
211 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
212 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
213 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
214 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
215 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
216 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
217 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
218
219 while (1) {
220 Z.hi ^= Htable[n].hi;
221 Z.lo ^= Htable[n].lo;
222
223 if ((u8 *)Xi == xi)
224 break;
225
226 n = *(--xi);
227
228 rem = (size_t)Z.lo & 0xff;
229 Z.lo = (Z.hi << 56)|(Z.lo >> 8);
230 Z.hi = (Z.hi >> 8);
231#if SIZE_MAX == 0xffffffffffffffff
232 Z.hi ^= rem_8bit[rem];
233#else
234 Z.hi ^= (u64)rem_8bit[rem] << 32;
235#endif
236 }
237
238 Xi[0] = htobe64(Z.hi);
239 Xi[1] = htobe64(Z.lo);
240}
241#define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
242
243#elif TABLE_BITS==4
244
245static void
246gcm_init_4bit(u128 Htable[16], u64 H[2])
247{ 60{
248 u128 V; 61 u128 V;
249#if defined(OPENSSL_SMALL_FOOTPRINT) 62 uint64_t T;
250 int i; 63 int i;
251#endif
252 64
253 Htable[0].hi = 0; 65 Htable[0].hi = 0;
254 Htable[0].lo = 0; 66 Htable[0].lo = 0;
255 V.hi = H[0]; 67 V.hi = H[0];
256 V.lo = H[1]; 68 V.lo = H[1];
257 69
258#if defined(OPENSSL_SMALL_FOOTPRINT)
259 for (Htable[8] = V, i = 4; i > 0; i >>= 1) { 70 for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
260 REDUCE1BIT(V); 71 T = U64(0xe100000000000000) & (0 - (V.lo & 1));
72 V.lo = (V.hi << 63) | (V.lo >> 1);
73 V.hi = (V.hi >> 1 ) ^ T;
261 Htable[i] = V; 74 Htable[i] = V;
262 } 75 }
263 76
264 for (i = 2; i < 16; i <<= 1) { 77 for (i = 2; i < 16; i <<= 1) {
265 u128 *Hi = Htable + i; 78 u128 *Hi = Htable + i;
266 int j; 79 int j;
267 for (V = *Hi, j = 1; j < i; ++j) { 80 for (V = *Hi, j = 1; j < i; j++) {
268 Hi[j].hi = V.hi ^ Htable[j].hi; 81 Hi[j].hi = V.hi ^ Htable[j].hi;
269 Hi[j].lo = V.lo ^ Htable[j].lo; 82 Hi[j].lo = V.lo ^ Htable[j].lo;
270 } 83 }
271 } 84 }
272#else
273 Htable[8] = V;
274 REDUCE1BIT(V);
275 Htable[4] = V;
276 REDUCE1BIT(V);
277 Htable[2] = V;
278 REDUCE1BIT(V);
279 Htable[1] = V;
280 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
281 V = Htable[4];
282 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
283 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
284 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
285 V = Htable[8];
286 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
287 Htable[10].hi = V.hi ^ Htable[2].hi,
288 Htable[10].lo = V.lo ^ Htable[2].lo;
289 Htable[11].hi = V.hi ^ Htable[3].hi,
290 Htable[11].lo = V.lo ^ Htable[3].lo;
291 Htable[12].hi = V.hi ^ Htable[4].hi,
292 Htable[12].lo = V.lo ^ Htable[4].lo;
293 Htable[13].hi = V.hi ^ Htable[5].hi,
294 Htable[13].lo = V.lo ^ Htable[5].lo;
295 Htable[14].hi = V.hi ^ Htable[6].hi,
296 Htable[14].lo = V.lo ^ Htable[6].lo;
297 Htable[15].hi = V.hi ^ Htable[7].hi,
298 Htable[15].lo = V.lo ^ Htable[7].lo;
299#endif
300#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
301 /*
302 * ARM assembler expects specific dword order in Htable.
303 */
304 {
305 int j;
306#if BYTE_ORDER == LITTLE_ENDIAN
307 for (j = 0; j < 16; ++j) {
308 V = Htable[j];
309 Htable[j].hi = V.lo;
310 Htable[j].lo = V.hi;
311 }
312#else /* BIG_ENDIAN */
313 for (j = 0; j < 16; ++j) {
314 V = Htable[j];
315 Htable[j].hi = V.lo << 32|V.lo >> 32;
316 Htable[j].lo = V.hi << 32|V.hi >> 32;
317 }
318#endif
319 }
320#endif
321} 85}
322 86
323#ifndef GHASH_ASM 87#ifdef GHASH_ASM
324static const size_t rem_4bit[16] = { 88void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
325 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460), 89void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
326 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0), 90 size_t len);
327 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), 91
328 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) }; 92#else
93static const uint16_t rem_4bit[16] = {
94 0x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0,
95 0xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0,
96};
329 97
330static void 98static void
331gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) 99gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16])
332{ 100{
333 u128 Z; 101 u128 Z;
334 int cnt = 15; 102 int cnt = 15;
335 size_t rem, nlo, nhi; 103 size_t rem, nlo, nhi;
336 104
337 nlo = ((const u8 *)Xi)[15]; 105 nlo = ((const uint8_t *)Xi)[15];
338 nhi = nlo >> 4; 106 nhi = nlo >> 4;
339 nlo &= 0xf; 107 nlo &= 0xf;
340 108
@@ -345,29 +113,21 @@ gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
345 rem = (size_t)Z.lo & 0xf; 113 rem = (size_t)Z.lo & 0xf;
346 Z.lo = (Z.hi << 60)|(Z.lo >> 4); 114 Z.lo = (Z.hi << 60)|(Z.lo >> 4);
347 Z.hi = (Z.hi >> 4); 115 Z.hi = (Z.hi >> 4);
348#if SIZE_MAX == 0xffffffffffffffff 116 Z.hi ^= (uint64_t)rem_4bit[rem] << 48;
349 Z.hi ^= rem_4bit[rem];
350#else
351 Z.hi ^= (u64)rem_4bit[rem] << 32;
352#endif
353 Z.hi ^= Htable[nhi].hi; 117 Z.hi ^= Htable[nhi].hi;
354 Z.lo ^= Htable[nhi].lo; 118 Z.lo ^= Htable[nhi].lo;
355 119
356 if (--cnt < 0) 120 if (--cnt < 0)
357 break; 121 break;
358 122
359 nlo = ((const u8 *)Xi)[cnt]; 123 nlo = ((const uint8_t *)Xi)[cnt];
360 nhi = nlo >> 4; 124 nhi = nlo >> 4;
361 nlo &= 0xf; 125 nlo &= 0xf;
362 126
363 rem = (size_t)Z.lo & 0xf; 127 rem = (size_t)Z.lo & 0xf;
364 Z.lo = (Z.hi << 60)|(Z.lo >> 4); 128 Z.lo = (Z.hi << 60)|(Z.lo >> 4);
365 Z.hi = (Z.hi >> 4); 129 Z.hi = (Z.hi >> 4);
366#if SIZE_MAX == 0xffffffffffffffff 130 Z.hi ^= (uint64_t)rem_4bit[rem] << 48;
367 Z.hi ^= rem_4bit[rem];
368#else
369 Z.hi ^= (u64)rem_4bit[rem] << 32;
370#endif
371 Z.hi ^= Htable[nlo].hi; 131 Z.hi ^= Htable[nlo].hi;
372 Z.lo ^= Htable[nlo].lo; 132 Z.lo ^= Htable[nlo].lo;
373 } 133 }
@@ -376,26 +136,17 @@ gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
376 Xi[1] = htobe64(Z.lo); 136 Xi[1] = htobe64(Z.lo);
377} 137}
378 138
379#if !defined(OPENSSL_SMALL_FOOTPRINT)
380/*
381 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
382 * details... Compiler-generated code doesn't seem to give any
383 * performance improvement, at least not on x86[_64]. It's here
384 * mostly as reference and a placeholder for possible future
385 * non-trivial optimization[s]...
386 */
387static void 139static void
388gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], 140gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16],
389 const u8 *inp, size_t len) 141 const uint8_t *inp, size_t len)
390{ 142{
391 u128 Z; 143 u128 Z;
392 int cnt; 144 int cnt;
393 size_t rem, nlo, nhi; 145 size_t rem, nlo, nhi;
394 146
395#if 1
396 do { 147 do {
397 cnt = 15; 148 cnt = 15;
398 nlo = ((const u8 *)Xi)[15]; 149 nlo = ((const uint8_t *)Xi)[15];
399 nlo ^= inp[15]; 150 nlo ^= inp[15];
400 nhi = nlo >> 4; 151 nhi = nlo >> 4;
401 nlo &= 0xf; 152 nlo &= 0xf;
@@ -407,18 +158,14 @@ gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
407 rem = (size_t)Z.lo & 0xf; 158 rem = (size_t)Z.lo & 0xf;
408 Z.lo = (Z.hi << 60)|(Z.lo >> 4); 159 Z.lo = (Z.hi << 60)|(Z.lo >> 4);
409 Z.hi = (Z.hi >> 4); 160 Z.hi = (Z.hi >> 4);
410#if SIZE_MAX == 0xffffffffffffffff 161 Z.hi ^= (uint64_t)rem_4bit[rem] << 48;
411 Z.hi ^= rem_4bit[rem];
412#else
413 Z.hi ^= (u64)rem_4bit[rem] << 32;
414#endif
415 Z.hi ^= Htable[nhi].hi; 162 Z.hi ^= Htable[nhi].hi;
416 Z.lo ^= Htable[nhi].lo; 163 Z.lo ^= Htable[nhi].lo;
417 164
418 if (--cnt < 0) 165 if (--cnt < 0)
419 break; 166 break;
420 167
421 nlo = ((const u8 *)Xi)[cnt]; 168 nlo = ((const uint8_t *)Xi)[cnt];
422 nlo ^= inp[cnt]; 169 nlo ^= inp[cnt];
423 nhi = nlo >> 4; 170 nhi = nlo >> 4;
424 nlo &= 0xf; 171 nlo &= 0xf;
@@ -426,222 +173,40 @@ gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
426 rem = (size_t)Z.lo & 0xf; 173 rem = (size_t)Z.lo & 0xf;
427 Z.lo = (Z.hi << 60)|(Z.lo >> 4); 174 Z.lo = (Z.hi << 60)|(Z.lo >> 4);
428 Z.hi = (Z.hi >> 4); 175 Z.hi = (Z.hi >> 4);
429#if SIZE_MAX == 0xffffffffffffffff 176 Z.hi ^= (uint64_t)rem_4bit[rem] << 48;
430 Z.hi ^= rem_4bit[rem];
431#else
432 Z.hi ^= (u64)rem_4bit[rem] << 32;
433#endif
434 Z.hi ^= Htable[nlo].hi; 177 Z.hi ^= Htable[nlo].hi;
435 Z.lo ^= Htable[nlo].lo; 178 Z.lo ^= Htable[nlo].lo;
436 } 179 }
437#else
438 /*
439 * Extra 256+16 bytes per-key plus 512 bytes shared tables
440 * [should] give ~50% improvement... One could have PACK()-ed
441 * the rem_8bit even here, but the priority is to minimize
442 * cache footprint...
443 */
444 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
445 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
446 static const unsigned short rem_8bit[256] = {
447 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
448 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
449 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
450 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
451 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
452 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
453 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
454 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
455 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
456 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
457 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
458 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
459 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
460 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
461 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
462 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
463 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
464 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
465 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
466 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
467 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
468 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
469 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
470 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
471 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
472 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
473 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
474 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
475 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
476 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
477 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
478 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
479 /*
480 * This pre-processing phase slows down procedure by approximately
481 * same time as it makes each loop spin faster. In other words
482 * single block performance is approximately same as straightforward
483 * "4-bit" implementation, and then it goes only faster...
484 */
485 for (cnt = 0; cnt < 16; ++cnt) {
486 Z.hi = Htable[cnt].hi;
487 Z.lo = Htable[cnt].lo;
488 Hshr4[cnt].lo = (Z.hi << 60)|(Z.lo >> 4);
489 Hshr4[cnt].hi = (Z.hi >> 4);
490 Hshl4[cnt] = (u8)(Z.lo << 4);
491 }
492
493 do {
494 for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
495 nlo = ((const u8 *)Xi)[cnt];
496 nlo ^= inp[cnt];
497 nhi = nlo >> 4;
498 nlo &= 0xf;
499
500 Z.hi ^= Htable[nlo].hi;
501 Z.lo ^= Htable[nlo].lo;
502
503 rem = (size_t)Z.lo & 0xff;
504
505 Z.lo = (Z.hi << 56)|(Z.lo >> 8);
506 Z.hi = (Z.hi >> 8);
507
508 Z.hi ^= Hshr4[nhi].hi;
509 Z.lo ^= Hshr4[nhi].lo;
510 Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
511 }
512
513 nlo = ((const u8 *)Xi)[0];
514 nlo ^= inp[0];
515 nhi = nlo >> 4;
516 nlo &= 0xf;
517
518 Z.hi ^= Htable[nlo].hi;
519 Z.lo ^= Htable[nlo].lo;
520
521 rem = (size_t)Z.lo & 0xf;
522
523 Z.lo = (Z.hi << 60)|(Z.lo >> 4);
524 Z.hi = (Z.hi >> 4);
525
526 Z.hi ^= Htable[nhi].hi;
527 Z.lo ^= Htable[nhi].lo;
528 Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
529#endif
530 180
531 Xi[0] = htobe64(Z.hi); 181 Xi[0] = htobe64(Z.hi);
532 Xi[1] = htobe64(Z.lo); 182 Xi[1] = htobe64(Z.lo);
533 } while (inp += 16, len -= 16); 183 } while (inp += 16, len -= 16);
534} 184}
535#endif 185#endif
536#else
537void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
538void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
539 size_t len);
540#endif
541
542#define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
543#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
544#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
545/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
546 * trashing effect. In other words idea is to hash data while it's
547 * still in L1 cache after encryption pass... */
548#define GHASH_CHUNK (3*1024)
549#endif
550 186
551#else /* TABLE_BITS */ 187static inline void
188gcm_mul(GCM128_CONTEXT *ctx, uint64_t u[2])
189{
190 ctx->gmult(u, ctx->Htable);
191}
552 192
553static void 193static inline void
554gcm_gmult_1bit(u64 Xi[2], const u64 H[2]) 194gcm_ghash(GCM128_CONTEXT *ctx, const uint8_t *in, size_t len)
555{ 195{
556 u128 V, Z = { 0,0 }; 196 ctx->ghash(ctx->Xi.u, ctx->Htable, in, len);
557 long X; 197}
558 int i, j;
559 const long *xi = (const long *)Xi;
560 198
561 V.hi = H[0]; /* H is in host byte order, no byte swapping */ 199#ifdef HAVE_GCM128_INIT
562 V.lo = H[1]; 200void gcm128_init(GCM128_CONTEXT *ctx);
563 201
564 for (j = 0; j < 16/sizeof(long); ++j) {
565#if BYTE_ORDER == LITTLE_ENDIAN
566#if SIZE_MAX == 0xffffffffffffffff
567#ifdef BSWAP8
568 X = (long)(BSWAP8(xi[j]));
569#else 202#else
570 const u8 *p = (const u8 *)(xi + j); 203static void
571 X = (long)((u64)GETU32(p) << 32|GETU32(p + 4)); 204gcm128_init(GCM128_CONTEXT *ctx)
572#endif 205{
573#else 206 gcm_init_4bit(ctx->Htable, ctx->H.u);
574 const u8 *p = (const u8 *)(xi + j); 207 ctx->gmult = gcm_gmult_4bit;
575 X = (long)GETU32(p); 208 ctx->ghash = gcm_ghash_4bit;
576#endif
577#else /* BIG_ENDIAN */
578 X = xi[j];
579#endif
580
581 for (i = 0; i < 8*sizeof(long); ++i, X <<= 1) {
582 u64 M = (u64)(X >> (8*sizeof(long) - 1));
583 Z.hi ^= V.hi & M;
584 Z.lo ^= V.lo & M;
585
586 REDUCE1BIT(V);
587 }
588 }
589
590 Xi[0] = htobe64(Z.hi);
591 Xi[1] = htobe64(Z.lo);
592} 209}
593#define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
594
595#endif
596
597#if defined(GHASH_ASM) && \
598 (defined(__i386) || defined(__i386__) || \
599 defined(__x86_64) || defined(__x86_64__) || \
600 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
601#include "x86_arch.h"
602#endif
603
604#if TABLE_BITS==4 && defined(GHASH_ASM)
605# if (defined(__i386) || defined(__i386__) || \
606 defined(__x86_64) || defined(__x86_64__) || \
607 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
608# define GHASH_ASM_X86_OR_64
609# define GCM_FUNCREF_4BIT
610
611void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
612void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
613void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
614 size_t len);
615
616# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
617# define GHASH_ASM_X86
618void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
619void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
620 size_t len);
621
622void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
623void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
624 size_t len);
625# endif
626# elif defined(__arm__) || defined(__arm)
627# include "arm_arch.h"
628# if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
629# define GHASH_ASM_ARM
630# define GCM_FUNCREF_4BIT
631void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
632void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
633 size_t len);
634# endif
635# endif
636#endif
637
638#ifdef GCM_FUNCREF_4BIT
639# undef GCM_MUL
640# define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
641# ifdef GHASH
642# undef GHASH
643# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
644# endif
645#endif 210#endif
646 211
647void 212void
@@ -657,60 +222,35 @@ CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
657 ctx->H.u[0] = be64toh(ctx->H.u[0]); 222 ctx->H.u[0] = be64toh(ctx->H.u[0]);
658 ctx->H.u[1] = be64toh(ctx->H.u[1]); 223 ctx->H.u[1] = be64toh(ctx->H.u[1]);
659 224
660#if TABLE_BITS==8 225 gcm128_init(ctx);
661 gcm_init_8bit(ctx->Htable, ctx->H.u);
662#elif TABLE_BITS==4
663# if defined(GHASH_ASM_X86_OR_64)
664# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
665 /* check FXSR and PCLMULQDQ bits */
666 if ((crypto_cpu_caps_ia32() & (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) ==
667 (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) {
668 gcm_init_clmul(ctx->Htable, ctx->H.u);
669 ctx->gmult = gcm_gmult_clmul;
670 ctx->ghash = gcm_ghash_clmul;
671 return;
672 }
673# endif
674 gcm_init_4bit(ctx->Htable, ctx->H.u);
675# if defined(GHASH_ASM_X86) /* x86 only */
676# if defined(OPENSSL_IA32_SSE2)
677 if (crypto_cpu_caps_ia32() & CPUCAP_MASK_SSE) { /* check SSE bit */
678# else
679 if (crypto_cpu_caps_ia32() & CPUCAP_MASK_MMX) { /* check MMX bit */
680# endif
681 ctx->gmult = gcm_gmult_4bit_mmx;
682 ctx->ghash = gcm_ghash_4bit_mmx;
683 } else {
684 ctx->gmult = gcm_gmult_4bit_x86;
685 ctx->ghash = gcm_ghash_4bit_x86;
686 }
687# else
688 ctx->gmult = gcm_gmult_4bit;
689 ctx->ghash = gcm_ghash_4bit;
690# endif
691# elif defined(GHASH_ASM_ARM)
692 if (OPENSSL_armcap_P & ARMV7_NEON) {
693 ctx->gmult = gcm_gmult_neon;
694 ctx->ghash = gcm_ghash_neon;
695 } else {
696 gcm_init_4bit(ctx->Htable, ctx->H.u);
697 ctx->gmult = gcm_gmult_4bit;
698 ctx->ghash = gcm_ghash_4bit;
699 }
700# else
701 gcm_init_4bit(ctx->Htable, ctx->H.u);
702# endif
703#endif
704} 226}
705LCRYPTO_ALIAS(CRYPTO_gcm128_init); 227LCRYPTO_ALIAS(CRYPTO_gcm128_init);
706 228
229GCM128_CONTEXT *
230CRYPTO_gcm128_new(void *key, block128_f block)
231{
232 GCM128_CONTEXT *ctx;
233
234 if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
235 return NULL;
236
237 CRYPTO_gcm128_init(ctx, key, block);
238
239 return ctx;
240}
241LCRYPTO_ALIAS(CRYPTO_gcm128_new);
242
243void
244CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
245{
246 freezero(ctx, sizeof(*ctx));
247}
248LCRYPTO_ALIAS(CRYPTO_gcm128_release);
249
707void 250void
708CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, size_t len) 251CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, size_t len)
709{ 252{
710 unsigned int ctr; 253 unsigned int ctr;
711#ifdef GCM_FUNCREF_4BIT
712 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
713#endif
714 254
715 ctx->Yi.u[0] = 0; 255 ctx->Yi.u[0] = 0;
716 ctx->Yi.u[1] = 0; 256 ctx->Yi.u[1] = 0;
@@ -727,577 +267,277 @@ CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, size_t len)
727 ctr = 1; 267 ctr = 1;
728 } else { 268 } else {
729 size_t i; 269 size_t i;
730 u64 len0 = len; 270 uint64_t len0 = len;
731 271
732 while (len >= 16) { 272 while (len >= 16) {
733 for (i = 0; i < 16; ++i) 273 for (i = 0; i < 16; i++)
734 ctx->Yi.c[i] ^= iv[i]; 274 ctx->Yi.c[i] ^= iv[i];
735 GCM_MUL(ctx, Yi); 275 gcm_mul(ctx, ctx->Yi.u);
736 iv += 16; 276 iv += 16;
737 len -= 16; 277 len -= 16;
738 } 278 }
739 if (len) { 279 if (len > 0) {
740 for (i = 0; i < len; ++i) 280 for (i = 0; i < len; i++)
741 ctx->Yi.c[i] ^= iv[i]; 281 ctx->Yi.c[i] ^= iv[i];
742 GCM_MUL(ctx, Yi); 282 gcm_mul(ctx, ctx->Yi.u);
743 } 283 }
744 len0 <<= 3; 284 len0 <<= 3;
745 ctx->Yi.u[1] ^= htobe64(len0); 285 ctx->Yi.u[1] ^= htobe64(len0);
746 286
747 GCM_MUL(ctx, Yi); 287 gcm_mul(ctx, ctx->Yi.u);
748 288
749 ctr = be32toh(ctx->Yi.d[3]); 289 ctr = be32toh(ctx->Yi.d[3]);
750 } 290 }
751 291
752 (*ctx->block)(ctx->Yi.c, ctx->EK0.c, ctx->key); 292 (*ctx->block)(ctx->Yi.c, ctx->EK0.c, ctx->key);
753 ++ctr; 293 ctx->Yi.d[3] = htobe32(++ctr);
754 ctx->Yi.d[3] = htobe32(ctr);
755} 294}
756LCRYPTO_ALIAS(CRYPTO_gcm128_setiv); 295LCRYPTO_ALIAS(CRYPTO_gcm128_setiv);
757 296
758int 297int
759CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, size_t len) 298CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, size_t len)
760{ 299{
761 size_t i;
762 unsigned int n; 300 unsigned int n;
763 u64 alen = ctx->len.u[0]; 301 uint64_t alen;
764#ifdef GCM_FUNCREF_4BIT 302 size_t i;
765 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
766# ifdef GHASH
767 void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
768 const u8 *inp, size_t len) = ctx->ghash;
769# endif
770#endif
771 303
772 if (ctx->len.u[1]) 304 if (ctx->len.u[1] != 0)
773 return -2; 305 return -2;
774 306
775 alen += len; 307 alen = ctx->len.u[0] + len;
776 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len)) 308 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
777 return -1; 309 return -1;
778 ctx->len.u[0] = alen; 310 ctx->len.u[0] = alen;
779 311
780 n = ctx->ares; 312 if ((n = ctx->ares) > 0) {
781 if (n) { 313 while (n > 0 && len > 0) {
782 while (n && len) {
783 ctx->Xi.c[n] ^= *(aad++); 314 ctx->Xi.c[n] ^= *(aad++);
784 --len;
785 n = (n + 1) % 16; 315 n = (n + 1) % 16;
316 len--;
786 } 317 }
787 if (n == 0) 318 if (n > 0) {
788 GCM_MUL(ctx, Xi);
789 else {
790 ctx->ares = n; 319 ctx->ares = n;
791 return 0; 320 return 0;
792 } 321 }
322 gcm_mul(ctx, ctx->Xi.u);
793 } 323 }
794 324
795#ifdef GHASH 325 if ((i = (len & (size_t)-16)) > 0) {
796 if ((i = (len & (size_t)-16))) { 326 gcm_ghash(ctx, aad, i);
797 GHASH(ctx, aad, i);
798 aad += i; 327 aad += i;
799 len -= i; 328 len -= i;
800 } 329 }
801#else 330 if (len > 0) {
802 while (len >= 16) {
803 for (i = 0; i < 16; ++i)
804 ctx->Xi.c[i] ^= aad[i];
805 GCM_MUL(ctx, Xi);
806 aad += 16;
807 len -= 16;
808 }
809#endif
810 if (len) {
811 n = (unsigned int)len; 331 n = (unsigned int)len;
812 for (i = 0; i < len; ++i) 332 for (i = 0; i < len; i++)
813 ctx->Xi.c[i] ^= aad[i]; 333 ctx->Xi.c[i] ^= aad[i];
814 } 334 }
815
816 ctx->ares = n; 335 ctx->ares = n;
336
817 return 0; 337 return 0;
818} 338}
819LCRYPTO_ALIAS(CRYPTO_gcm128_aad); 339LCRYPTO_ALIAS(CRYPTO_gcm128_aad);
820 340
821int 341int
822CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, 342CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const unsigned char *in,
823 const unsigned char *in, unsigned char *out, 343 unsigned char *out, size_t len)
824 size_t len)
825{ 344{
826 unsigned int n, ctr; 345 unsigned int n, ctr;
346 uint64_t mlen;
827 size_t i; 347 size_t i;
828 u64 mlen = ctx->len.u[1];
829 block128_f block = ctx->block;
830 void *key = ctx->key;
831#ifdef GCM_FUNCREF_4BIT
832 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
833# ifdef GHASH
834 void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
835 const u8 *inp, size_t len) = ctx->ghash;
836# endif
837#endif
838 348
839 mlen += len; 349 mlen = ctx->len.u[1] + len;
840 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 350 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
841 return -1; 351 return -1;
842 ctx->len.u[1] = mlen; 352 ctx->len.u[1] = mlen;
843 353
844 if (ctx->ares) { 354 if (ctx->ares > 0) {
845 /* First call to encrypt finalizes GHASH(AAD) */ 355 /* First call to encrypt finalizes GHASH(AAD) */
846 GCM_MUL(ctx, Xi); 356 gcm_mul(ctx, ctx->Xi.u);
847 ctx->ares = 0; 357 ctx->ares = 0;
848 } 358 }
849 359
850 ctr = be32toh(ctx->Yi.d[3]); 360 ctr = be32toh(ctx->Yi.d[3]);
851 361
852 n = ctx->mres; 362 n = ctx->mres;
853#if !defined(OPENSSL_SMALL_FOOTPRINT)
854 if (16 % sizeof(size_t) == 0)
855 do { /* always true actually */
856 if (n) {
857 while (n && len) {
858 ctx->Xi.c[n] ^= *(out++) = *(in++) ^
859 ctx->EKi.c[n];
860 --len;
861 n = (n + 1) % 16;
862 }
863 if (n == 0)
864 GCM_MUL(ctx, Xi);
865 else {
866 ctx->mres = n;
867 return 0;
868 }
869 }
870#ifdef __STRICT_ALIGNMENT
871 if (((size_t)in|(size_t)out) % sizeof(size_t) != 0)
872 break;
873#endif
874#if defined(GHASH) && defined(GHASH_CHUNK)
875 while (len >= GHASH_CHUNK) {
876 size_t j = GHASH_CHUNK;
877
878 while (j) {
879 size_t *out_t = (size_t *)out;
880 const size_t *in_t = (const size_t *)in;
881
882 (*block)(ctx->Yi.c, ctx->EKi.c, key);
883 ++ctr;
884 ctx->Yi.d[3] = htobe32(ctr);
885
886 for (i = 0; i < 16/sizeof(size_t); ++i)
887 out_t[i] = in_t[i] ^
888 ctx->EKi.t[i];
889 out += 16;
890 in += 16;
891 j -= 16;
892 }
893 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
894 len -= GHASH_CHUNK;
895 }
896 if ((i = (len & (size_t)-16))) {
897 size_t j = i;
898
899 while (len >= 16) {
900 size_t *out_t = (size_t *)out;
901 const size_t *in_t = (const size_t *)in;
902
903 (*block)(ctx->Yi.c, ctx->EKi.c, key);
904 ++ctr;
905 ctx->Yi.d[3] = htobe32(ctr);
906
907 for (i = 0; i < 16/sizeof(size_t); ++i)
908 out_t[i] = in_t[i] ^
909 ctx->EKi.t[i];
910 out += 16;
911 in += 16;
912 len -= 16;
913 }
914 GHASH(ctx, out - j, j);
915 }
916#else
917 while (len >= 16) {
918 size_t *out_t = (size_t *)out;
919 const size_t *in_t = (const size_t *)in;
920
921 (*block)(ctx->Yi.c, ctx->EKi.c, key);
922 ++ctr;
923 ctx->Yi.d[3] = htobe32(ctr);
924
925 for (i = 0; i < 16/sizeof(size_t); ++i)
926 ctx->Xi.t[i] ^=
927 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
928 GCM_MUL(ctx, Xi);
929 out += 16;
930 in += 16;
931 len -= 16;
932 }
933#endif
934 if (len) {
935 (*block)(ctx->Yi.c, ctx->EKi.c, key);
936 ++ctr;
937 ctx->Yi.d[3] = htobe32(ctr);
938
939 while (len--) {
940 ctx->Xi.c[n] ^= out[n] = in[n] ^
941 ctx->EKi.c[n];
942 ++n;
943 }
944 }
945 363
946 ctx->mres = n; 364 for (i = 0; i < len; i++) {
947 return 0;
948 } while (0);
949#endif
950 for (i = 0; i < len; ++i) {
951 if (n == 0) { 365 if (n == 0) {
952 (*block)(ctx->Yi.c, ctx->EKi.c, key); 366 ctx->block(ctx->Yi.c, ctx->EKi.c, ctx->key);
953 ++ctr; 367 ctx->Yi.d[3] = htobe32(++ctr);
954 ctx->Yi.d[3] = htobe32(ctr);
955 } 368 }
956 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n]; 369 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
957 n = (n + 1) % 16; 370 n = (n + 1) % 16;
958 if (n == 0) 371 if (n == 0)
959 GCM_MUL(ctx, Xi); 372 gcm_mul(ctx, ctx->Xi.u);
960 } 373 }
961 374
962 ctx->mres = n; 375 ctx->mres = n;
376
963 return 0; 377 return 0;
964} 378}
965LCRYPTO_ALIAS(CRYPTO_gcm128_encrypt); 379LCRYPTO_ALIAS(CRYPTO_gcm128_encrypt);
966 380
967int 381int
968CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, 382CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const unsigned char *in,
969 const unsigned char *in, unsigned char *out, 383 unsigned char *out, size_t len)
970 size_t len)
971{ 384{
972 unsigned int n, ctr; 385 unsigned int n, ctr;
386 uint64_t mlen;
387 uint8_t c;
973 size_t i; 388 size_t i;
974 u64 mlen = ctx->len.u[1];
975 block128_f block = ctx->block;
976 void *key = ctx->key;
977#ifdef GCM_FUNCREF_4BIT
978 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
979# ifdef GHASH
980 void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
981 const u8 *inp, size_t len) = ctx->ghash;
982# endif
983#endif
984 389
985 mlen += len; 390 mlen = ctx->len.u[1] + len;
986 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 391 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
987 return -1; 392 return -1;
988 ctx->len.u[1] = mlen; 393 ctx->len.u[1] = mlen;
989 394
990 if (ctx->ares) { 395 if (ctx->ares) {
991 /* First call to decrypt finalizes GHASH(AAD) */ 396 /* First call to decrypt finalizes GHASH(AAD) */
992 GCM_MUL(ctx, Xi); 397 gcm_mul(ctx, ctx->Xi.u);
993 ctx->ares = 0; 398 ctx->ares = 0;
994 } 399 }
995 400
996 ctr = be32toh(ctx->Yi.d[3]); 401 ctr = be32toh(ctx->Yi.d[3]);
997 402
998 n = ctx->mres; 403 n = ctx->mres;
999#if !defined(OPENSSL_SMALL_FOOTPRINT)
1000 if (16 % sizeof(size_t) == 0)
1001 do { /* always true actually */
1002 if (n) {
1003 while (n && len) {
1004 u8 c = *(in++);
1005 *(out++) = c ^ ctx->EKi.c[n];
1006 ctx->Xi.c[n] ^= c;
1007 --len;
1008 n = (n + 1) % 16;
1009 }
1010 if (n == 0)
1011 GCM_MUL(ctx, Xi);
1012 else {
1013 ctx->mres = n;
1014 return 0;
1015 }
1016 }
1017#ifdef __STRICT_ALIGNMENT
1018 if (((size_t)in|(size_t)out) % sizeof(size_t) != 0)
1019 break;
1020#endif
1021#if defined(GHASH) && defined(GHASH_CHUNK)
1022 while (len >= GHASH_CHUNK) {
1023 size_t j = GHASH_CHUNK;
1024
1025 GHASH(ctx, in, GHASH_CHUNK);
1026 while (j) {
1027 size_t *out_t = (size_t *)out;
1028 const size_t *in_t = (const size_t *)in;
1029
1030 (*block)(ctx->Yi.c, ctx->EKi.c, key);
1031 ++ctr;
1032 ctx->Yi.d[3] = htobe32(ctr);
1033
1034 for (i = 0; i < 16/sizeof(size_t); ++i)
1035 out_t[i] = in_t[i] ^
1036 ctx->EKi.t[i];
1037 out += 16;
1038 in += 16;
1039 j -= 16;
1040 }
1041 len -= GHASH_CHUNK;
1042 }
1043 if ((i = (len & (size_t)-16))) {
1044 GHASH(ctx, in, i);
1045 while (len >= 16) {
1046 size_t *out_t = (size_t *)out;
1047 const size_t *in_t = (const size_t *)in;
1048
1049 (*block)(ctx->Yi.c, ctx->EKi.c, key);
1050 ++ctr;
1051 ctx->Yi.d[3] = htobe32(ctr);
1052
1053 for (i = 0; i < 16/sizeof(size_t); ++i)
1054 out_t[i] = in_t[i] ^
1055 ctx->EKi.t[i];
1056 out += 16;
1057 in += 16;
1058 len -= 16;
1059 }
1060 }
1061#else
1062 while (len >= 16) {
1063 size_t *out_t = (size_t *)out;
1064 const size_t *in_t = (const size_t *)in;
1065
1066 (*block)(ctx->Yi.c, ctx->EKi.c, key);
1067 ++ctr;
1068 ctx->Yi.d[3] = htobe32(ctr);
1069
1070 for (i = 0; i < 16/sizeof(size_t); ++i) {
1071 size_t c = in[i];
1072 out[i] = c ^ ctx->EKi.t[i];
1073 ctx->Xi.t[i] ^= c;
1074 }
1075 GCM_MUL(ctx, Xi);
1076 out += 16;
1077 in += 16;
1078 len -= 16;
1079 }
1080#endif
1081 if (len) {
1082 (*block)(ctx->Yi.c, ctx->EKi.c, key);
1083 ++ctr;
1084 ctx->Yi.d[3] = htobe32(ctr);
1085
1086 while (len--) {
1087 u8 c = in[n];
1088 ctx->Xi.c[n] ^= c;
1089 out[n] = c ^ ctx->EKi.c[n];
1090 ++n;
1091 }
1092 }
1093 404
1094 ctx->mres = n; 405 for (i = 0; i < len; i++) {
1095 return 0;
1096 } while (0);
1097#endif
1098 for (i = 0; i < len; ++i) {
1099 u8 c;
1100 if (n == 0) { 406 if (n == 0) {
1101 (*block)(ctx->Yi.c, ctx->EKi.c, key); 407 ctx->block(ctx->Yi.c, ctx->EKi.c, ctx->key);
1102 ++ctr; 408 ctx->Yi.d[3] = htobe32(++ctr);
1103 ctx->Yi.d[3] = htobe32(ctr);
1104 } 409 }
1105 c = in[i]; 410 c = in[i];
1106 out[i] = c ^ ctx->EKi.c[n]; 411 out[i] = c ^ ctx->EKi.c[n];
1107 ctx->Xi.c[n] ^= c; 412 ctx->Xi.c[n] ^= c;
1108 n = (n + 1) % 16; 413 n = (n + 1) % 16;
1109 if (n == 0) 414 if (n == 0)
1110 GCM_MUL(ctx, Xi); 415 gcm_mul(ctx, ctx->Xi.u);
1111 } 416 }
1112 417
1113 ctx->mres = n; 418 ctx->mres = n;
419
1114 return 0; 420 return 0;
1115} 421}
1116LCRYPTO_ALIAS(CRYPTO_gcm128_decrypt); 422LCRYPTO_ALIAS(CRYPTO_gcm128_decrypt);
1117 423
1118int 424int
1119CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, 425CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const unsigned char *in,
1120 const unsigned char *in, unsigned char *out, 426 unsigned char *out, size_t len, ctr128_f stream)
1121 size_t len, ctr128_f stream)
1122{ 427{
1123 unsigned int n, ctr; 428 unsigned int n, ctr;
1124 size_t i; 429 uint64_t mlen;
1125 u64 mlen = ctx->len.u[1]; 430 size_t i, j;
1126 void *key = ctx->key;
1127#ifdef GCM_FUNCREF_4BIT
1128 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1129# ifdef GHASH
1130 void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
1131 const u8 *inp, size_t len) = ctx->ghash;
1132# endif
1133#endif
1134 431
1135 mlen += len; 432 mlen = ctx->len.u[1] + len;
1136 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 433 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1137 return -1; 434 return -1;
1138 ctx->len.u[1] = mlen; 435 ctx->len.u[1] = mlen;
1139 436
1140 if (ctx->ares) { 437 if (ctx->ares > 0) {
1141 /* First call to encrypt finalizes GHASH(AAD) */ 438 /* First call to encrypt finalizes GHASH(AAD) */
1142 GCM_MUL(ctx, Xi); 439 gcm_mul(ctx, ctx->Xi.u);
1143 ctx->ares = 0; 440 ctx->ares = 0;
1144 } 441 }
1145 442
1146 ctr = be32toh(ctx->Yi.d[3]); 443 ctr = be32toh(ctx->Yi.d[3]);
1147 444
1148 n = ctx->mres; 445 if ((n = ctx->mres) > 0) {
1149 if (n) { 446 while (n > 0 && len > 0) {
1150 while (n && len) {
1151 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 447 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1152 --len;
1153 n = (n + 1) % 16; 448 n = (n + 1) % 16;
449 len--;
1154 } 450 }
1155 if (n == 0) 451 if (n > 0) {
1156 GCM_MUL(ctx, Xi);
1157 else {
1158 ctx->mres = n; 452 ctx->mres = n;
1159 return 0; 453 return 0;
1160 } 454 }
455 gcm_mul(ctx, ctx->Xi.u);
1161 } 456 }
1162#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 457 if ((i = (len & (size_t)-16)) > 0) {
1163 while (len >= GHASH_CHUNK) { 458 j = i / 16;
1164 (*stream)(in, out, GHASH_CHUNK/16, key, ctx->Yi.c); 459 stream(in, out, j, ctx->key, ctx->Yi.c);
1165 ctr += GHASH_CHUNK/16;
1166 ctx->Yi.d[3] = htobe32(ctr);
1167 GHASH(ctx, out, GHASH_CHUNK);
1168 out += GHASH_CHUNK;
1169 in += GHASH_CHUNK;
1170 len -= GHASH_CHUNK;
1171 }
1172#endif
1173 if ((i = (len & (size_t)-16))) {
1174 size_t j = i/16;
1175
1176 (*stream)(in, out, j, key, ctx->Yi.c);
1177 ctr += (unsigned int)j; 460 ctr += (unsigned int)j;
1178 ctx->Yi.d[3] = htobe32(ctr); 461 ctx->Yi.d[3] = htobe32(ctr);
462 gcm_ghash(ctx, out, i);
1179 in += i; 463 in += i;
1180 len -= i;
1181#if defined(GHASH)
1182 GHASH(ctx, out, i);
1183 out += i; 464 out += i;
1184#else 465 len -= i;
1185 while (j--) {
1186 for (i = 0; i < 16; ++i)
1187 ctx->Xi.c[i] ^= out[i];
1188 GCM_MUL(ctx, Xi);
1189 out += 16;
1190 }
1191#endif
1192 } 466 }
1193 if (len) { 467 if (len > 0) {
1194 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key); 468 ctx->block(ctx->Yi.c, ctx->EKi.c, ctx->key);
1195 ++ctr; 469 ctx->Yi.d[3] = htobe32(++ctr);
1196 ctx->Yi.d[3] = htobe32(ctr); 470 while (len-- > 0) {
1197 while (len--) {
1198 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 471 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1199 ++n; 472 n++;
1200 } 473 }
1201 } 474 }
1202 475
1203 ctx->mres = n; 476 ctx->mres = n;
477
1204 return 0; 478 return 0;
1205} 479}
1206LCRYPTO_ALIAS(CRYPTO_gcm128_encrypt_ctr32); 480LCRYPTO_ALIAS(CRYPTO_gcm128_encrypt_ctr32);
1207 481
1208int 482int
1209CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, 483CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const unsigned char *in,
1210 const unsigned char *in, unsigned char *out, 484 unsigned char *out, size_t len, ctr128_f stream)
1211 size_t len, ctr128_f stream)
1212{ 485{
1213 unsigned int n, ctr; 486 unsigned int n, ctr;
1214 size_t i; 487 uint64_t mlen;
1215 u64 mlen = ctx->len.u[1]; 488 size_t i, j;
1216 void *key = ctx->key; 489 uint8_t c;
1217#ifdef GCM_FUNCREF_4BIT
1218 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1219# ifdef GHASH
1220 void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
1221 const u8 *inp, size_t len) = ctx->ghash;
1222# endif
1223#endif
1224 490
1225 mlen += len; 491 mlen = ctx->len.u[1] + len;
1226 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 492 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1227 return -1; 493 return -1;
1228 ctx->len.u[1] = mlen; 494 ctx->len.u[1] = mlen;
1229 495
1230 if (ctx->ares) { 496 if (ctx->ares > 0) {
1231 /* First call to decrypt finalizes GHASH(AAD) */ 497 /* First call to decrypt finalizes GHASH(AAD) */
1232 GCM_MUL(ctx, Xi); 498 gcm_mul(ctx, ctx->Xi.u);
1233 ctx->ares = 0; 499 ctx->ares = 0;
1234 } 500 }
1235 501
1236 ctr = be32toh(ctx->Yi.d[3]); 502 ctr = be32toh(ctx->Yi.d[3]);
1237 503
1238 n = ctx->mres; 504 if ((n = ctx->mres) > 0) {
1239 if (n) { 505 while (n > 0 && len > 0) {
1240 while (n && len) { 506 c = *(in++);
1241 u8 c = *(in++);
1242 *(out++) = c ^ ctx->EKi.c[n]; 507 *(out++) = c ^ ctx->EKi.c[n];
1243 ctx->Xi.c[n] ^= c; 508 ctx->Xi.c[n] ^= c;
1244 --len;
1245 n = (n + 1) % 16; 509 n = (n + 1) % 16;
510 len--;
1246 } 511 }
1247 if (n == 0) 512 if (n > 0) {
1248 GCM_MUL(ctx, Xi);
1249 else {
1250 ctx->mres = n; 513 ctx->mres = n;
1251 return 0; 514 return 0;
1252 } 515 }
516 gcm_mul(ctx, ctx->Xi.u);
1253 } 517 }
1254#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 518 if ((i = (len & (size_t)-16)) > 0) {
1255 while (len >= GHASH_CHUNK) { 519 j = i / 16;
1256 GHASH(ctx, in, GHASH_CHUNK); 520 gcm_ghash(ctx, in, i);
1257 (*stream)(in, out, GHASH_CHUNK/16, key, ctx->Yi.c); 521 stream(in, out, j, ctx->key, ctx->Yi.c);
1258 ctr += GHASH_CHUNK/16;
1259 ctx->Yi.d[3] = htobe32(ctr);
1260 out += GHASH_CHUNK;
1261 in += GHASH_CHUNK;
1262 len -= GHASH_CHUNK;
1263 }
1264#endif
1265 if ((i = (len & (size_t)-16))) {
1266 size_t j = i/16;
1267
1268#if defined(GHASH)
1269 GHASH(ctx, in, i);
1270#else
1271 while (j--) {
1272 size_t k;
1273 for (k = 0; k < 16; ++k)
1274 ctx->Xi.c[k] ^= in[k];
1275 GCM_MUL(ctx, Xi);
1276 in += 16;
1277 }
1278 j = i/16;
1279 in -= i;
1280#endif
1281 (*stream)(in, out, j, key, ctx->Yi.c);
1282 ctr += (unsigned int)j; 522 ctr += (unsigned int)j;
1283 ctx->Yi.d[3] = htobe32(ctr); 523 ctx->Yi.d[3] = htobe32(ctr);
1284 out += i;
1285 in += i; 524 in += i;
525 out += i;
1286 len -= i; 526 len -= i;
1287 } 527 }
1288 if (len) { 528 if (len > 0) {
1289 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key); 529 ctx->block(ctx->Yi.c, ctx->EKi.c, ctx->key);
1290 ++ctr; 530 ctx->Yi.d[3] = htobe32(++ctr);
1291 ctx->Yi.d[3] = htobe32(ctr); 531 while (len-- > 0) {
1292 while (len--) { 532 c = in[n];
1293 u8 c = in[n];
1294 ctx->Xi.c[n] ^= c; 533 ctx->Xi.c[n] ^= c;
1295 out[n] = c ^ ctx->EKi.c[n]; 534 out[n] = c ^ ctx->EKi.c[n];
1296 ++n; 535 n++;
1297 } 536 }
1298 } 537 }
1299 538
1300 ctx->mres = n; 539 ctx->mres = n;
540
1301 return 0; 541 return 0;
1302} 542}
1303LCRYPTO_ALIAS(CRYPTO_gcm128_decrypt_ctr32); 543LCRYPTO_ALIAS(CRYPTO_gcm128_decrypt_ctr32);
@@ -1306,26 +546,25 @@ int
1306CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag, 546CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1307 size_t len) 547 size_t len)
1308{ 548{
1309 u64 alen = ctx->len.u[0] << 3; 549 uint64_t alen, clen;
1310 u64 clen = ctx->len.u[1] << 3; 550
1311#ifdef GCM_FUNCREF_4BIT 551 alen = ctx->len.u[0] << 3;
1312 void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 552 clen = ctx->len.u[1] << 3;
1313#endif
1314 553
1315 if (ctx->mres || ctx->ares) 554 if (ctx->ares > 0 || ctx->mres > 0)
1316 GCM_MUL(ctx, Xi); 555 gcm_mul(ctx, ctx->Xi.u);
1317 556
1318 ctx->Xi.u[0] ^= htobe64(alen); 557 ctx->Xi.u[0] ^= htobe64(alen);
1319 ctx->Xi.u[1] ^= htobe64(clen); 558 ctx->Xi.u[1] ^= htobe64(clen);
1320 GCM_MUL(ctx, Xi); 559 gcm_mul(ctx, ctx->Xi.u);
1321 560
1322 ctx->Xi.u[0] ^= ctx->EK0.u[0]; 561 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1323 ctx->Xi.u[1] ^= ctx->EK0.u[1]; 562 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1324 563
1325 if (tag && len <= sizeof(ctx->Xi)) 564 if (tag == NULL || len > sizeof(ctx->Xi))
1326 return memcmp(ctx->Xi.c, tag, len);
1327 else
1328 return -1; 565 return -1;
566
567 return timingsafe_memcmp(ctx->Xi.c, tag, len);
1329} 568}
1330LCRYPTO_ALIAS(CRYPTO_gcm128_finish); 569LCRYPTO_ALIAS(CRYPTO_gcm128_finish);
1331 570
@@ -1333,26 +572,10 @@ void
1333CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) 572CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1334{ 573{
1335 CRYPTO_gcm128_finish(ctx, NULL, 0); 574 CRYPTO_gcm128_finish(ctx, NULL, 0);
1336 memcpy(tag, ctx->Xi.c,
1337 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1338}
1339LCRYPTO_ALIAS(CRYPTO_gcm128_tag);
1340
1341GCM128_CONTEXT *
1342CRYPTO_gcm128_new(void *key, block128_f block)
1343{
1344 GCM128_CONTEXT *ret;
1345 575
1346 if ((ret = malloc(sizeof(GCM128_CONTEXT)))) 576 if (len > sizeof(ctx->Xi.c))
1347 CRYPTO_gcm128_init(ret, key, block); 577 len = sizeof(ctx->Xi.c);
1348 578
1349 return ret; 579 memcpy(tag, ctx->Xi.c, len);
1350} 580}
1351LCRYPTO_ALIAS(CRYPTO_gcm128_new); 581LCRYPTO_ALIAS(CRYPTO_gcm128_tag);
1352
1353void
1354CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1355{
1356 freezero(ctx, sizeof(*ctx));
1357}
1358LCRYPTO_ALIAS(CRYPTO_gcm128_release);
diff --git a/src/lib/libcrypto/modes/gcm128_amd64.c b/src/lib/libcrypto/modes/gcm128_amd64.c
new file mode 100644
index 0000000000..eaa66fb32f
--- /dev/null
+++ b/src/lib/libcrypto/modes/gcm128_amd64.c
@@ -0,0 +1,44 @@
1/* $OpenBSD: gcm128_amd64.c,v 1.1 2025/06/28 12:39:10 jsing Exp $ */
2/*
3 * Copyright (c) 2025 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include "crypto_arch.h"
19#include "modes_local.h"
20
21void gcm_init_4bit(u128 Htable[16], uint64_t H[2]);
22void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
23void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
24 size_t len);
25
26void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
27void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
28void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
29 size_t len);
30
31void
32gcm128_init(GCM128_CONTEXT *ctx)
33{
34 if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_CLMUL) != 0) {
35 gcm_init_clmul(ctx->Htable, ctx->H.u);
36 ctx->gmult = gcm_gmult_clmul;
37 ctx->ghash = gcm_ghash_clmul;
38 return;
39 }
40
41 gcm_init_4bit(ctx->Htable, ctx->H.u);
42 ctx->gmult = gcm_gmult_4bit;
43 ctx->ghash = gcm_ghash_4bit;
44}
diff --git a/src/lib/libcrypto/modes/gcm128_i386.c b/src/lib/libcrypto/modes/gcm128_i386.c
new file mode 100644
index 0000000000..ac517fdb04
--- /dev/null
+++ b/src/lib/libcrypto/modes/gcm128_i386.c
@@ -0,0 +1,56 @@
1/* $OpenBSD: gcm128_i386.c,v 1.1 2025/06/28 12:39:10 jsing Exp $ */
2/*
3 * Copyright (c) 2025 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include "crypto_arch.h"
19#include "modes_local.h"
20
21void gcm_init_4bit(u128 Htable[16], uint64_t H[2]);
22
23void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]);
24void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
25 size_t len);
26
27void gcm_gmult_4bit_x86(uint64_t Xi[2], const u128 Htable[16]);
28void gcm_ghash_4bit_x86(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
29 size_t len);
30
31void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
32void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
33void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
34 size_t len);
35
36void
37gcm128_init(GCM128_CONTEXT *ctx)
38{
39 if ((crypto_cpu_caps_i386 & CRYPTO_CPU_CAPS_I386_CLMUL) != 0) {
40 gcm_init_clmul(ctx->Htable, ctx->H.u);
41 ctx->gmult = gcm_gmult_clmul;
42 ctx->ghash = gcm_ghash_clmul;
43 return;
44 }
45
46 if ((crypto_cpu_caps_i386 & CRYPTO_CPU_CAPS_I386_MMX) != 0) {
47 gcm_init_4bit(ctx->Htable, ctx->H.u);
48 ctx->gmult = gcm_gmult_4bit_mmx;
49 ctx->ghash = gcm_ghash_4bit_mmx;
50 return;
51 }
52
53 gcm_init_4bit(ctx->Htable, ctx->H.u);
54 ctx->gmult = gcm_gmult_4bit_x86;
55 ctx->ghash = gcm_ghash_4bit_x86;
56}
diff --git a/src/lib/libcrypto/modes/modes_local.h b/src/lib/libcrypto/modes/modes_local.h
index 511855f2e0..5c1acfc25f 100644
--- a/src/lib/libcrypto/modes/modes_local.h
+++ b/src/lib/libcrypto/modes/modes_local.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: modes_local.h,v 1.2 2023/07/08 14:55:36 beck Exp $ */ 1/* $OpenBSD: modes_local.h,v 1.7 2025/07/13 06:01:33 jsing Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2010 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -15,105 +15,47 @@
15__BEGIN_HIDDEN_DECLS 15__BEGIN_HIDDEN_DECLS
16 16
17#if defined(_LP64) 17#if defined(_LP64)
18typedef long i64;
19typedef unsigned long u64;
20#define U64(C) C##UL 18#define U64(C) C##UL
21#else 19#else
22typedef long long i64;
23typedef unsigned long long u64;
24#define U64(C) C##ULL 20#define U64(C) C##ULL
25#endif 21#endif
26 22
27typedef unsigned int u32;
28typedef unsigned char u8;
29
30#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
31#if defined(__GNUC__) && __GNUC__>=2
32# if defined(__x86_64) || defined(__x86_64__)
33# define BSWAP8(x) ({ u64 ret=(x); \
34 asm ("bswapq %0" \
35 : "+r"(ret)); ret; })
36# define BSWAP4(x) ({ u32 ret=(x); \
37 asm ("bswapl %0" \
38 : "+r"(ret)); ret; })
39# elif (defined(__i386) || defined(__i386__))
40# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \
41 asm ("bswapl %0; bswapl %1" \
42 : "+r"(hi),"+r"(lo)); \
43 (u64)hi<<32|lo; })
44# define BSWAP4(x) ({ u32 ret=(x); \
45 asm ("bswapl %0" \
46 : "+r"(ret)); ret; })
47# elif (defined(__arm__) || defined(__arm)) && !defined(__STRICT_ALIGNMENT)
48# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \
49 asm ("rev %0,%0; rev %1,%1" \
50 : "+r"(hi),"+r"(lo)); \
51 (u64)hi<<32|lo; })
52# define BSWAP4(x) ({ u32 ret; \
53 asm ("rev %0,%1" \
54 : "=r"(ret) : "r"((u32)(x))); \
55 ret; })
56# endif
57#endif
58#endif
59
60#if defined(BSWAP4) && !defined(__STRICT_ALIGNMENT)
61#define GETU32(p) BSWAP4(*(const u32 *)(p))
62#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
63#else
64#define GETU32(p) ((u32)(p)[0]<<24|(u32)(p)[1]<<16|(u32)(p)[2]<<8|(u32)(p)[3])
65#define PUTU32(p,v) ((p)[0]=(u8)((v)>>24),(p)[1]=(u8)((v)>>16),(p)[2]=(u8)((v)>>8),(p)[3]=(u8)(v))
66#endif
67
68/* GCM definitions */ 23/* GCM definitions */
69 24
70typedef struct { 25typedef struct {
71 u64 hi, lo; 26 uint64_t hi, lo;
72} u128; 27} u128;
73 28
74#ifdef TABLE_BITS
75#undef TABLE_BITS
76#endif
77/*
78 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
79 * never be set to 8 [or 1]. For further information see gcm128.c.
80 */
81#define TABLE_BITS 4
82
83struct gcm128_context { 29struct gcm128_context {
84 /* Following 6 names follow names in GCM specification */ 30 /* Following 6 names follow names in GCM specification */
85 union { 31 union {
86 u64 u[2]; 32 uint64_t u[2];
87 u32 d[4]; 33 uint32_t d[4];
88 u8 c[16]; 34 uint8_t c[16];
89 size_t t[16/sizeof(size_t)]; 35 size_t t[16/sizeof(size_t)];
90 } Yi, EKi, EK0, len, Xi, H; 36 } Yi, EKi, EK0, len, Xi, H;
91 /* Relative position of Xi, H and pre-computed Htable is used 37 /* Relative position of Xi, H and pre-computed Htable is used
92 * in some assembler modules, i.e. don't change the order! */ 38 * in some assembler modules, i.e. don't change the order! */
93#if TABLE_BITS==8
94 u128 Htable[256];
95#else
96 u128 Htable[16]; 39 u128 Htable[16];
97 void (*gmult)(u64 Xi[2], const u128 Htable[16]); 40 void (*gmult)(uint64_t Xi[2], const u128 Htable[16]);
98 void (*ghash)(u64 Xi[2], const u128 Htable[16], const u8 *inp, 41 void (*ghash)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
99 size_t len); 42 size_t len);
100#endif
101 unsigned int mres, ares; 43 unsigned int mres, ares;
102 block128_f block; 44 block128_f block;
103 void *key; 45 void *key;
104}; 46};
105 47
106struct xts128_context { 48struct xts128_context {
107 void *key1, *key2; 49 const void *key1, *key2;
108 block128_f block1, block2; 50 block128_f block1, block2;
109}; 51};
110 52
111struct ccm128_context { 53struct ccm128_context {
112 union { 54 union {
113 u64 u[2]; 55 uint64_t u[2];
114 u8 c[16]; 56 uint8_t c[16];
115 } nonce, cmac; 57 } nonce, cmac;
116 u64 blocks; 58 uint64_t blocks;
117 block128_f block; 59 block128_f block;
118 void *key; 60 void *key;
119}; 61};
diff --git a/src/lib/libcrypto/modes/ofb128.c b/src/lib/libcrypto/modes/ofb128.c
index 42afd29d58..8440e7f583 100644
--- a/src/lib/libcrypto/modes/ofb128.c
+++ b/src/lib/libcrypto/modes/ofb128.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: ofb128.c,v 1.7 2023/07/08 14:56:54 beck Exp $ */ 1/* $OpenBSD: ofb128.c,v 1.10 2025/04/23 10:09:08 jsing Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2008 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -49,15 +49,11 @@
49 * 49 *
50 */ 50 */
51 51
52#include <openssl/crypto.h>
53#include "modes_local.h"
54#include <string.h> 52#include <string.h>
55 53
56#ifndef MODES_DEBUG 54#include <openssl/crypto.h>
57# ifndef NDEBUG 55
58# define NDEBUG 56#include "modes_local.h"
59# endif
60#endif
61 57
62/* The input and output encrypted as though 128bit ofb mode is being 58/* The input and output encrypted as though 128bit ofb mode is being
63 * used. The extra state information to record how much of the 59 * used. The extra state information to record how much of the
@@ -74,7 +70,6 @@ CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out,
74 70
75 n = *num; 71 n = *num;
76 72
77#if !defined(OPENSSL_SMALL_FOOTPRINT)
78 if (16 % sizeof(size_t) == 0) 73 if (16 % sizeof(size_t) == 0)
79 do { /* always true actually */ 74 do { /* always true actually */
80 while (n && len) { 75 while (n && len) {
@@ -109,7 +104,6 @@ CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out,
109 return; 104 return;
110 } while (0); 105 } while (0);
111 /* the rest would be commonly eliminated by x86* compiler */ 106 /* the rest would be commonly eliminated by x86* compiler */
112#endif
113 while (l < len) { 107 while (l < len) {
114 if (n == 0) { 108 if (n == 0) {
115 (*block)(ivec, ivec, key); 109 (*block)(ivec, ivec, key);
diff --git a/src/lib/libcrypto/modes/xts128.c b/src/lib/libcrypto/modes/xts128.c
index 7516acf850..9c863e73d6 100644
--- a/src/lib/libcrypto/modes/xts128.c
+++ b/src/lib/libcrypto/modes/xts128.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: xts128.c,v 1.12 2023/07/08 14:56:54 beck Exp $ */ 1/* $OpenBSD: xts128.c,v 1.15 2025/05/18 09:05:59 jsing Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2011 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -48,17 +48,12 @@
48 * ==================================================================== 48 * ====================================================================
49 */ 49 */
50 50
51#include <openssl/crypto.h>
52#include "modes_local.h"
53
54#include <endian.h> 51#include <endian.h>
55#include <string.h> 52#include <string.h>
56 53
57#ifndef MODES_DEBUG 54#include <openssl/crypto.h>
58# ifndef NDEBUG 55
59# define NDEBUG 56#include "modes_local.h"
60# endif
61#endif
62 57
63int 58int
64CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16], 59CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
@@ -66,9 +61,9 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
66 size_t len, int enc) 61 size_t len, int enc)
67{ 62{
68 union { 63 union {
69 u64 u[2]; 64 uint64_t u[2];
70 u32 d[4]; 65 uint32_t d[4];
71 u8 c[16]; 66 uint8_t c[16];
72 } tweak, scratch; 67 } tweak, scratch;
73 unsigned int i; 68 unsigned int i;
74 69
@@ -88,8 +83,8 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
88 scratch.u[0] ^= tweak.u[0]; 83 scratch.u[0] ^= tweak.u[0];
89 scratch.u[1] ^= tweak.u[1]; 84 scratch.u[1] ^= tweak.u[1];
90#else 85#else
91 scratch.u[0] = ((u64 *)inp)[0] ^ tweak.u[0]; 86 scratch.u[0] = ((uint64_t *)inp)[0] ^ tweak.u[0];
92 scratch.u[1] = ((u64 *)inp)[1] ^ tweak.u[1]; 87 scratch.u[1] = ((uint64_t *)inp)[1] ^ tweak.u[1];
93#endif 88#endif
94 (*ctx->block1)(scratch.c, scratch.c, ctx->key1); 89 (*ctx->block1)(scratch.c, scratch.c, ctx->key1);
95#ifdef __STRICT_ALIGNMENT 90#ifdef __STRICT_ALIGNMENT
@@ -97,8 +92,8 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
97 scratch.u[1] ^= tweak.u[1]; 92 scratch.u[1] ^= tweak.u[1];
98 memcpy(out, scratch.c, 16); 93 memcpy(out, scratch.c, 16);
99#else 94#else
100 ((u64 *)out)[0] = scratch.u[0] ^= tweak.u[0]; 95 ((uint64_t *)out)[0] = scratch.u[0] ^= tweak.u[0];
101 ((u64 *)out)[1] = scratch.u[1] ^= tweak.u[1]; 96 ((uint64_t *)out)[1] = scratch.u[1] ^= tweak.u[1];
102#endif 97#endif
103 inp += 16; 98 inp += 16;
104 out += 16; 99 out += 16;
@@ -120,15 +115,15 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
120 for (c = 0, i = 0; i < 16; ++i) { 115 for (c = 0, i = 0; i < 16; ++i) {
121 /*+ substitutes for |, because c is 1 bit */ 116 /*+ substitutes for |, because c is 1 bit */
122 c += ((size_t)tweak.c[i]) << 1; 117 c += ((size_t)tweak.c[i]) << 1;
123 tweak.c[i] = (u8)c; 118 tweak.c[i] = (uint8_t)c;
124 c = c >> 8; 119 c = c >> 8;
125 } 120 }
126 tweak.c[0] ^= (u8)(0x87 & (0 - c)); 121 tweak.c[0] ^= (uint8_t)(0x87 & (0 - c));
127#endif 122#endif
128 } 123 }
129 if (enc) { 124 if (enc) {
130 for (i = 0; i < len; ++i) { 125 for (i = 0; i < len; ++i) {
131 u8 ch = inp[i]; 126 uint8_t ch = inp[i];
132 out[i] = scratch.c[i]; 127 out[i] = scratch.c[i];
133 scratch.c[i] = ch; 128 scratch.c[i] = ch;
134 } 129 }
@@ -140,8 +135,8 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
140 memcpy(out - 16, scratch.c, 16); 135 memcpy(out - 16, scratch.c, 16);
141 } else { 136 } else {
142 union { 137 union {
143 u64 u[2]; 138 uint64_t u[2];
144 u8 c[16]; 139 uint8_t c[16];
145 } tweak1; 140 } tweak1;
146 141
147#if BYTE_ORDER == LITTLE_ENDIAN 142#if BYTE_ORDER == LITTLE_ENDIAN
@@ -157,25 +152,25 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
157 for (c = 0, i = 0; i < 16; ++i) { 152 for (c = 0, i = 0; i < 16; ++i) {
158 /*+ substitutes for |, because c is 1 bit */ 153 /*+ substitutes for |, because c is 1 bit */
159 c += ((size_t)tweak.c[i]) << 1; 154 c += ((size_t)tweak.c[i]) << 1;
160 tweak1.c[i] = (u8)c; 155 tweak1.c[i] = (uint8_t)c;
161 c = c >> 8; 156 c = c >> 8;
162 } 157 }
163 tweak1.c[0] ^= (u8)(0x87 & (0 - c)); 158 tweak1.c[0] ^= (uint8_t)(0x87 & (0 - c));
164#endif 159#endif
165#ifdef __STRICT_ALIGNMENT 160#ifdef __STRICT_ALIGNMENT
166 memcpy(scratch.c, inp, 16); 161 memcpy(scratch.c, inp, 16);
167 scratch.u[0] ^= tweak1.u[0]; 162 scratch.u[0] ^= tweak1.u[0];
168 scratch.u[1] ^= tweak1.u[1]; 163 scratch.u[1] ^= tweak1.u[1];
169#else 164#else
170 scratch.u[0] = ((u64 *)inp)[0] ^ tweak1.u[0]; 165 scratch.u[0] = ((uint64_t *)inp)[0] ^ tweak1.u[0];
171 scratch.u[1] = ((u64 *)inp)[1] ^ tweak1.u[1]; 166 scratch.u[1] = ((uint64_t *)inp)[1] ^ tweak1.u[1];
172#endif 167#endif
173 (*ctx->block1)(scratch.c, scratch.c, ctx->key1); 168 (*ctx->block1)(scratch.c, scratch.c, ctx->key1);
174 scratch.u[0] ^= tweak1.u[0]; 169 scratch.u[0] ^= tweak1.u[0];
175 scratch.u[1] ^= tweak1.u[1]; 170 scratch.u[1] ^= tweak1.u[1];
176 171
177 for (i = 0; i < len; ++i) { 172 for (i = 0; i < len; ++i) {
178 u8 ch = inp[16 + i]; 173 uint8_t ch = inp[16 + i];
179 out[16 + i] = scratch.c[i]; 174 out[16 + i] = scratch.c[i];
180 scratch.c[i] = ch; 175 scratch.c[i] = ch;
181 } 176 }
@@ -187,8 +182,8 @@ CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
187 scratch.u[1] ^= tweak.u[1]; 182 scratch.u[1] ^= tweak.u[1];
188 memcpy(out, scratch.c, 16); 183 memcpy(out, scratch.c, 16);
189#else 184#else
190 ((u64 *)out)[0] = scratch.u[0] ^ tweak.u[0]; 185 ((uint64_t *)out)[0] = scratch.u[0] ^ tweak.u[0];
191 ((u64 *)out)[1] = scratch.u[1] ^ tweak.u[1]; 186 ((uint64_t *)out)[1] = scratch.u[1] ^ tweak.u[1];
192#endif 187#endif
193 } 188 }
194 189