diff options
Diffstat (limited to 'src/lib/libcrypto/modes/gcm128.c')
| -rw-r--r-- | src/lib/libcrypto/modes/gcm128.c | 1187 |
1 files changed, 627 insertions, 560 deletions
diff --git a/src/lib/libcrypto/modes/gcm128.c b/src/lib/libcrypto/modes/gcm128.c index 36aac413c3..45d33db768 100644 --- a/src/lib/libcrypto/modes/gcm128.c +++ b/src/lib/libcrypto/modes/gcm128.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: gcm128.c,v 1.23 2022/11/26 16:08:53 tb Exp $ */ | 1 | /* $OpenBSD: gcm128.c,v 1.24 2023/07/08 14:55:36 beck Exp $ */ |
| 2 | /* ==================================================================== | 2 | /* ==================================================================== |
| 3 | * Copyright (c) 2010 The OpenSSL Project. All rights reserved. | 3 | * Copyright (c) 2010 The OpenSSL Project. All rights reserved. |
| 4 | * | 4 | * |
| @@ -69,17 +69,17 @@ | |||
| 69 | #endif | 69 | #endif |
| 70 | 70 | ||
| 71 | #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16)) | 71 | #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16)) |
| 72 | #define REDUCE1BIT(V) \ | 72 | #define REDUCE1BIT(V) \ |
| 73 | do { \ | 73 | do { \ |
| 74 | if (sizeof(size_t)==8) { \ | 74 | if (sizeof(size_t)==8) { \ |
| 75 | u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \ | 75 | u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \ |
| 76 | V.lo = (V.hi<<63)|(V.lo>>1); \ | 76 | V.lo = (V.hi<<63)|(V.lo>>1); \ |
| 77 | V.hi = (V.hi>>1 )^T; \ | 77 | V.hi = (V.hi>>1 )^T; \ |
| 78 | } else { \ | 78 | } else { \ |
| 79 | u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \ | 79 | u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \ |
| 80 | V.lo = (V.hi<<63)|(V.lo>>1); \ | 80 | V.lo = (V.hi<<63)|(V.lo>>1); \ |
| 81 | V.hi = (V.hi>>1 )^((u64)T<<32); \ | 81 | V.hi = (V.hi>>1 )^((u64)T<<32); \ |
| 82 | } \ | 82 | } \ |
| 83 | } while(0) | 83 | } while(0) |
| 84 | 84 | ||
| 85 | /* | 85 | /* |
| @@ -118,7 +118,8 @@ | |||
| 118 | */ | 118 | */ |
| 119 | #if TABLE_BITS==8 | 119 | #if TABLE_BITS==8 |
| 120 | 120 | ||
| 121 | static void gcm_init_8bit(u128 Htable[256], u64 H[2]) | 121 | static void |
| 122 | gcm_init_8bit(u128 Htable[256], u64 H[2]) | ||
| 122 | { | 123 | { |
| 123 | int i, j; | 124 | int i, j; |
| 124 | u128 V; | 125 | u128 V; |
| @@ -128,24 +129,25 @@ static void gcm_init_8bit(u128 Htable[256], u64 H[2]) | |||
| 128 | V.hi = H[0]; | 129 | V.hi = H[0]; |
| 129 | V.lo = H[1]; | 130 | V.lo = H[1]; |
| 130 | 131 | ||
| 131 | for (Htable[128]=V, i=64; i>0; i>>=1) { | 132 | for (Htable[128] = V, i = 64; i > 0; i >>= 1) { |
| 132 | REDUCE1BIT(V); | 133 | REDUCE1BIT(V); |
| 133 | Htable[i] = V; | 134 | Htable[i] = V; |
| 134 | } | 135 | } |
| 135 | 136 | ||
| 136 | for (i=2; i<256; i<<=1) { | 137 | for (i = 2; i < 256; i <<= 1) { |
| 137 | u128 *Hi = Htable+i, H0 = *Hi; | 138 | u128 *Hi = Htable + i, H0 = *Hi; |
| 138 | for (j=1; j<i; ++j) { | 139 | for (j = 1; j < i; ++j) { |
| 139 | Hi[j].hi = H0.hi^Htable[j].hi; | 140 | Hi[j].hi = H0.hi ^ Htable[j].hi; |
| 140 | Hi[j].lo = H0.lo^Htable[j].lo; | 141 | Hi[j].lo = H0.lo ^ Htable[j].lo; |
| 141 | } | 142 | } |
| 142 | } | 143 | } |
| 143 | } | 144 | } |
| 144 | 145 | ||
| 145 | static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256]) | 146 | static void |
| 147 | gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256]) | ||
| 146 | { | 148 | { |
| 147 | u128 Z = { 0, 0}; | 149 | u128 Z = { 0, 0}; |
| 148 | const u8 *xi = (const u8 *)Xi+15; | 150 | const u8 *xi = (const u8 *)Xi + 15; |
| 149 | size_t rem, n = *xi; | 151 | size_t rem, n = *xi; |
| 150 | static const size_t rem_8bit[256] = { | 152 | static const size_t rem_8bit[256] = { |
| 151 | PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246), | 153 | PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246), |
| @@ -217,17 +219,18 @@ static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256]) | |||
| 217 | Z.hi ^= Htable[n].hi; | 219 | Z.hi ^= Htable[n].hi; |
| 218 | Z.lo ^= Htable[n].lo; | 220 | Z.lo ^= Htable[n].lo; |
| 219 | 221 | ||
| 220 | if ((u8 *)Xi==xi) break; | 222 | if ((u8 *)Xi == xi) |
| 223 | break; | ||
| 221 | 224 | ||
| 222 | n = *(--xi); | 225 | n = *(--xi); |
| 223 | 226 | ||
| 224 | rem = (size_t)Z.lo&0xff; | 227 | rem = (size_t)Z.lo & 0xff; |
| 225 | Z.lo = (Z.hi<<56)|(Z.lo>>8); | 228 | Z.lo = (Z.hi << 56)|(Z.lo >> 8); |
| 226 | Z.hi = (Z.hi>>8); | 229 | Z.hi = (Z.hi >> 8); |
| 227 | #if SIZE_MAX == 0xffffffffffffffff | 230 | #if SIZE_MAX == 0xffffffffffffffff |
| 228 | Z.hi ^= rem_8bit[rem]; | 231 | Z.hi ^= rem_8bit[rem]; |
| 229 | #else | 232 | #else |
| 230 | Z.hi ^= (u64)rem_8bit[rem]<<32; | 233 | Z.hi ^= (u64)rem_8bit[rem] << 32; |
| 231 | #endif | 234 | #endif |
| 232 | } | 235 | } |
| 233 | 236 | ||
| @@ -238,10 +241,14 @@ static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256]) | |||
| 238 | #else | 241 | #else |
| 239 | u8 *p = (u8 *)Xi; | 242 | u8 *p = (u8 *)Xi; |
| 240 | u32 v; | 243 | u32 v; |
| 241 | v = (u32)(Z.hi>>32); PUTU32(p,v); | 244 | v = (u32)(Z.hi >> 32); |
| 242 | v = (u32)(Z.hi); PUTU32(p+4,v); | 245 | PUTU32(p, v); |
| 243 | v = (u32)(Z.lo>>32); PUTU32(p+8,v); | 246 | v = (u32)(Z.hi); |
| 244 | v = (u32)(Z.lo); PUTU32(p+12,v); | 247 | PUTU32(p + 4, v); |
| 248 | v = (u32)(Z.lo >> 32); | ||
| 249 | PUTU32(p + 8, v); | ||
| 250 | v = (u32)(Z.lo); | ||
| 251 | PUTU32(p + 12, v); | ||
| 245 | #endif | 252 | #endif |
| 246 | #else /* BIG_ENDIAN */ | 253 | #else /* BIG_ENDIAN */ |
| 247 | Xi[0] = Z.hi; | 254 | Xi[0] = Z.hi; |
| @@ -252,7 +259,8 @@ static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256]) | |||
| 252 | 259 | ||
| 253 | #elif TABLE_BITS==4 | 260 | #elif TABLE_BITS==4 |
| 254 | 261 | ||
| 255 | static void gcm_init_4bit(u128 Htable[16], u64 H[2]) | 262 | static void |
| 263 | gcm_init_4bit(u128 Htable[16], u64 H[2]) | ||
| 256 | { | 264 | { |
| 257 | u128 V; | 265 | u128 V; |
| 258 | #if defined(OPENSSL_SMALL_FOOTPRINT) | 266 | #if defined(OPENSSL_SMALL_FOOTPRINT) |
| @@ -265,17 +273,17 @@ static void gcm_init_4bit(u128 Htable[16], u64 H[2]) | |||
| 265 | V.lo = H[1]; | 273 | V.lo = H[1]; |
| 266 | 274 | ||
| 267 | #if defined(OPENSSL_SMALL_FOOTPRINT) | 275 | #if defined(OPENSSL_SMALL_FOOTPRINT) |
| 268 | for (Htable[8]=V, i=4; i>0; i>>=1) { | 276 | for (Htable[8] = V, i = 4; i > 0; i >>= 1) { |
| 269 | REDUCE1BIT(V); | 277 | REDUCE1BIT(V); |
| 270 | Htable[i] = V; | 278 | Htable[i] = V; |
| 271 | } | 279 | } |
| 272 | 280 | ||
| 273 | for (i=2; i<16; i<<=1) { | 281 | for (i = 2; i < 16; i <<= 1) { |
| 274 | u128 *Hi = Htable+i; | 282 | u128 *Hi = Htable + i; |
| 275 | int j; | 283 | int j; |
| 276 | for (V=*Hi, j=1; j<i; ++j) { | 284 | for (V = *Hi, j = 1; j < i; ++j) { |
| 277 | Hi[j].hi = V.hi^Htable[j].hi; | 285 | Hi[j].hi = V.hi ^ Htable[j].hi; |
| 278 | Hi[j].lo = V.lo^Htable[j].lo; | 286 | Hi[j].lo = V.lo ^ Htable[j].lo; |
| 279 | } | 287 | } |
| 280 | } | 288 | } |
| 281 | #else | 289 | #else |
| @@ -286,19 +294,25 @@ static void gcm_init_4bit(u128 Htable[16], u64 H[2]) | |||
| 286 | Htable[2] = V; | 294 | Htable[2] = V; |
| 287 | REDUCE1BIT(V); | 295 | REDUCE1BIT(V); |
| 288 | Htable[1] = V; | 296 | Htable[1] = V; |
| 289 | Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo; | 297 | Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo; |
| 290 | V=Htable[4]; | 298 | V = Htable[4]; |
| 291 | Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo; | 299 | Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo; |
| 292 | Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo; | 300 | Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo; |
| 293 | Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo; | 301 | Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo; |
| 294 | V=Htable[8]; | 302 | V = Htable[8]; |
| 295 | Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo; | 303 | Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo; |
| 296 | Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo; | 304 | Htable[10].hi = V.hi ^ Htable[2].hi, |
| 297 | Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo; | 305 | Htable[10].lo = V.lo ^ Htable[2].lo; |
| 298 | Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo; | 306 | Htable[11].hi = V.hi ^ Htable[3].hi, |
| 299 | Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo; | 307 | Htable[11].lo = V.lo ^ Htable[3].lo; |
| 300 | Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo; | 308 | Htable[12].hi = V.hi ^ Htable[4].hi, |
| 301 | Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo; | 309 | Htable[12].lo = V.lo ^ Htable[4].lo; |
| 310 | Htable[13].hi = V.hi ^ Htable[5].hi, | ||
| 311 | Htable[13].lo = V.lo ^ Htable[5].lo; | ||
| 312 | Htable[14].hi = V.hi ^ Htable[6].hi, | ||
| 313 | Htable[14].lo = V.lo ^ Htable[6].lo; | ||
| 314 | Htable[15].hi = V.hi ^ Htable[7].hi, | ||
| 315 | Htable[15].lo = V.lo ^ Htable[7].lo; | ||
| 302 | #endif | 316 | #endif |
| 303 | #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm)) | 317 | #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm)) |
| 304 | /* | 318 | /* |
| @@ -307,16 +321,16 @@ static void gcm_init_4bit(u128 Htable[16], u64 H[2]) | |||
| 307 | { | 321 | { |
| 308 | int j; | 322 | int j; |
| 309 | #if BYTE_ORDER == LITTLE_ENDIAN | 323 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 310 | for (j=0;j<16;++j) { | 324 | for (j = 0; j < 16; ++j) { |
| 311 | V = Htable[j]; | 325 | V = Htable[j]; |
| 312 | Htable[j].hi = V.lo; | 326 | Htable[j].hi = V.lo; |
| 313 | Htable[j].lo = V.hi; | 327 | Htable[j].lo = V.hi; |
| 314 | } | 328 | } |
| 315 | #else /* BIG_ENDIAN */ | 329 | #else /* BIG_ENDIAN */ |
| 316 | for (j=0;j<16;++j) { | 330 | for (j = 0; j < 16; ++j) { |
| 317 | V = Htable[j]; | 331 | V = Htable[j]; |
| 318 | Htable[j].hi = V.lo<<32|V.lo>>32; | 332 | Htable[j].hi = V.lo << 32|V.lo >> 32; |
| 319 | Htable[j].lo = V.hi<<32|V.hi>>32; | 333 | Htable[j].lo = V.hi << 32|V.hi >> 32; |
| 320 | } | 334 | } |
| 321 | #endif | 335 | #endif |
| 322 | } | 336 | } |
| @@ -330,44 +344,46 @@ static const size_t rem_4bit[16] = { | |||
| 330 | PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), | 344 | PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), |
| 331 | PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) }; | 345 | PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) }; |
| 332 | 346 | ||
| 333 | static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) | 347 | static void |
| 348 | gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) | ||
| 334 | { | 349 | { |
| 335 | u128 Z; | 350 | u128 Z; |
| 336 | int cnt = 15; | 351 | int cnt = 15; |
| 337 | size_t rem, nlo, nhi; | 352 | size_t rem, nlo, nhi; |
| 338 | 353 | ||
| 339 | nlo = ((const u8 *)Xi)[15]; | 354 | nlo = ((const u8 *)Xi)[15]; |
| 340 | nhi = nlo>>4; | 355 | nhi = nlo >> 4; |
| 341 | nlo &= 0xf; | 356 | nlo &= 0xf; |
| 342 | 357 | ||
| 343 | Z.hi = Htable[nlo].hi; | 358 | Z.hi = Htable[nlo].hi; |
| 344 | Z.lo = Htable[nlo].lo; | 359 | Z.lo = Htable[nlo].lo; |
| 345 | 360 | ||
| 346 | while (1) { | 361 | while (1) { |
| 347 | rem = (size_t)Z.lo&0xf; | 362 | rem = (size_t)Z.lo & 0xf; |
| 348 | Z.lo = (Z.hi<<60)|(Z.lo>>4); | 363 | Z.lo = (Z.hi << 60)|(Z.lo >> 4); |
| 349 | Z.hi = (Z.hi>>4); | 364 | Z.hi = (Z.hi >> 4); |
| 350 | #if SIZE_MAX == 0xffffffffffffffff | 365 | #if SIZE_MAX == 0xffffffffffffffff |
| 351 | Z.hi ^= rem_4bit[rem]; | 366 | Z.hi ^= rem_4bit[rem]; |
| 352 | #else | 367 | #else |
| 353 | Z.hi ^= (u64)rem_4bit[rem]<<32; | 368 | Z.hi ^= (u64)rem_4bit[rem] << 32; |
| 354 | #endif | 369 | #endif |
| 355 | Z.hi ^= Htable[nhi].hi; | 370 | Z.hi ^= Htable[nhi].hi; |
| 356 | Z.lo ^= Htable[nhi].lo; | 371 | Z.lo ^= Htable[nhi].lo; |
| 357 | 372 | ||
| 358 | if (--cnt<0) break; | 373 | if (--cnt < 0) |
| 374 | break; | ||
| 359 | 375 | ||
| 360 | nlo = ((const u8 *)Xi)[cnt]; | 376 | nlo = ((const u8 *)Xi)[cnt]; |
| 361 | nhi = nlo>>4; | 377 | nhi = nlo >> 4; |
| 362 | nlo &= 0xf; | 378 | nlo &= 0xf; |
| 363 | 379 | ||
| 364 | rem = (size_t)Z.lo&0xf; | 380 | rem = (size_t)Z.lo & 0xf; |
| 365 | Z.lo = (Z.hi<<60)|(Z.lo>>4); | 381 | Z.lo = (Z.hi << 60)|(Z.lo >> 4); |
| 366 | Z.hi = (Z.hi>>4); | 382 | Z.hi = (Z.hi >> 4); |
| 367 | #if SIZE_MAX == 0xffffffffffffffff | 383 | #if SIZE_MAX == 0xffffffffffffffff |
| 368 | Z.hi ^= rem_4bit[rem]; | 384 | Z.hi ^= rem_4bit[rem]; |
| 369 | #else | 385 | #else |
| 370 | Z.hi ^= (u64)rem_4bit[rem]<<32; | 386 | Z.hi ^= (u64)rem_4bit[rem] << 32; |
| 371 | #endif | 387 | #endif |
| 372 | Z.hi ^= Htable[nlo].hi; | 388 | Z.hi ^= Htable[nlo].hi; |
| 373 | Z.lo ^= Htable[nlo].lo; | 389 | Z.lo ^= Htable[nlo].lo; |
| @@ -380,10 +396,14 @@ static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) | |||
| 380 | #else | 396 | #else |
| 381 | u8 *p = (u8 *)Xi; | 397 | u8 *p = (u8 *)Xi; |
| 382 | u32 v; | 398 | u32 v; |
| 383 | v = (u32)(Z.hi>>32); PUTU32(p,v); | 399 | v = (u32)(Z.hi >> 32); |
| 384 | v = (u32)(Z.hi); PUTU32(p+4,v); | 400 | PUTU32(p, v); |
| 385 | v = (u32)(Z.lo>>32); PUTU32(p+8,v); | 401 | v = (u32)(Z.hi); |
| 386 | v = (u32)(Z.lo); PUTU32(p+12,v); | 402 | PUTU32(p + 4, v); |
| 403 | v = (u32)(Z.lo >> 32); | ||
| 404 | PUTU32(p + 8, v); | ||
| 405 | v = (u32)(Z.lo); | ||
| 406 | PUTU32(p + 12, v); | ||
| 387 | #endif | 407 | #endif |
| 388 | #else /* BIG_ENDIAN */ | 408 | #else /* BIG_ENDIAN */ |
| 389 | Xi[0] = Z.hi; | 409 | Xi[0] = Z.hi; |
| @@ -399,54 +419,56 @@ static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) | |||
| 399 | * mostly as reference and a placeholder for possible future | 419 | * mostly as reference and a placeholder for possible future |
| 400 | * non-trivial optimization[s]... | 420 | * non-trivial optimization[s]... |
| 401 | */ | 421 | */ |
| 402 | static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16], | 422 | static void |
| 403 | const u8 *inp,size_t len) | 423 | gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], |
| 424 | const u8 *inp, size_t len) | ||
| 404 | { | 425 | { |
| 405 | u128 Z; | 426 | u128 Z; |
| 406 | int cnt; | 427 | int cnt; |
| 407 | size_t rem, nlo, nhi; | 428 | size_t rem, nlo, nhi; |
| 408 | 429 | ||
| 409 | #if 1 | 430 | #if 1 |
| 410 | do { | 431 | do { |
| 411 | cnt = 15; | 432 | cnt = 15; |
| 412 | nlo = ((const u8 *)Xi)[15]; | 433 | nlo = ((const u8 *)Xi)[15]; |
| 413 | nlo ^= inp[15]; | 434 | nlo ^= inp[15]; |
| 414 | nhi = nlo>>4; | 435 | nhi = nlo >> 4; |
| 415 | nlo &= 0xf; | 436 | nlo &= 0xf; |
| 416 | 437 | ||
| 417 | Z.hi = Htable[nlo].hi; | 438 | Z.hi = Htable[nlo].hi; |
| 418 | Z.lo = Htable[nlo].lo; | 439 | Z.lo = Htable[nlo].lo; |
| 419 | 440 | ||
| 420 | while (1) { | 441 | while (1) { |
| 421 | rem = (size_t)Z.lo&0xf; | 442 | rem = (size_t)Z.lo & 0xf; |
| 422 | Z.lo = (Z.hi<<60)|(Z.lo>>4); | 443 | Z.lo = (Z.hi << 60)|(Z.lo >> 4); |
| 423 | Z.hi = (Z.hi>>4); | 444 | Z.hi = (Z.hi >> 4); |
| 424 | #if SIZE_MAX == 0xffffffffffffffff | 445 | #if SIZE_MAX == 0xffffffffffffffff |
| 425 | Z.hi ^= rem_4bit[rem]; | 446 | Z.hi ^= rem_4bit[rem]; |
| 426 | #else | 447 | #else |
| 427 | Z.hi ^= (u64)rem_4bit[rem]<<32; | 448 | Z.hi ^= (u64)rem_4bit[rem] << 32; |
| 428 | #endif | 449 | #endif |
| 429 | Z.hi ^= Htable[nhi].hi; | 450 | Z.hi ^= Htable[nhi].hi; |
| 430 | Z.lo ^= Htable[nhi].lo; | 451 | Z.lo ^= Htable[nhi].lo; |
| 431 | 452 | ||
| 432 | if (--cnt<0) break; | 453 | if (--cnt < 0) |
| 454 | break; | ||
| 433 | 455 | ||
| 434 | nlo = ((const u8 *)Xi)[cnt]; | 456 | nlo = ((const u8 *)Xi)[cnt]; |
| 435 | nlo ^= inp[cnt]; | 457 | nlo ^= inp[cnt]; |
| 436 | nhi = nlo>>4; | 458 | nhi = nlo >> 4; |
| 437 | nlo &= 0xf; | 459 | nlo &= 0xf; |
| 438 | 460 | ||
| 439 | rem = (size_t)Z.lo&0xf; | 461 | rem = (size_t)Z.lo & 0xf; |
| 440 | Z.lo = (Z.hi<<60)|(Z.lo>>4); | 462 | Z.lo = (Z.hi << 60)|(Z.lo >> 4); |
| 441 | Z.hi = (Z.hi>>4); | 463 | Z.hi = (Z.hi >> 4); |
| 442 | #if SIZE_MAX == 0xffffffffffffffff | 464 | #if SIZE_MAX == 0xffffffffffffffff |
| 443 | Z.hi ^= rem_4bit[rem]; | 465 | Z.hi ^= rem_4bit[rem]; |
| 444 | #else | 466 | #else |
| 445 | Z.hi ^= (u64)rem_4bit[rem]<<32; | 467 | Z.hi ^= (u64)rem_4bit[rem] << 32; |
| 446 | #endif | 468 | #endif |
| 447 | Z.hi ^= Htable[nlo].hi; | 469 | Z.hi ^= Htable[nlo].hi; |
| 448 | Z.lo ^= Htable[nlo].lo; | 470 | Z.lo ^= Htable[nlo].lo; |
| 449 | } | 471 | } |
| 450 | #else | 472 | #else |
| 451 | /* | 473 | /* |
| 452 | * Extra 256+16 bytes per-key plus 512 bytes shared tables | 474 | * Extra 256+16 bytes per-key plus 512 bytes shared tables |
| @@ -454,115 +476,120 @@ static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16], | |||
| 454 | * the rem_8bit even here, but the priority is to minimize | 476 | * the rem_8bit even here, but the priority is to minimize |
| 455 | * cache footprint... | 477 | * cache footprint... |
| 456 | */ | 478 | */ |
| 457 | u128 Hshr4[16]; /* Htable shifted right by 4 bits */ | 479 | u128 Hshr4[16]; /* Htable shifted right by 4 bits */ |
| 458 | u8 Hshl4[16]; /* Htable shifted left by 4 bits */ | 480 | u8 Hshl4[16]; /* Htable shifted left by 4 bits */ |
| 459 | static const unsigned short rem_8bit[256] = { | 481 | static const unsigned short rem_8bit[256] = { |
| 460 | 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E, | 482 | 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E, |
| 461 | 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E, | 483 | 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E, |
| 462 | 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E, | 484 | 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E, |
| 463 | 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E, | 485 | 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E, |
| 464 | 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E, | 486 | 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E, |
| 465 | 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E, | 487 | 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E, |
| 466 | 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E, | 488 | 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E, |
| 467 | 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E, | 489 | 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E, |
| 468 | 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE, | 490 | 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE, |
| 469 | 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE, | 491 | 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE, |
| 470 | 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE, | 492 | 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE, |
| 471 | 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE, | 493 | 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE, |
| 472 | 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E, | 494 | 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E, |
| 473 | 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E, | 495 | 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E, |
| 474 | 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE, | 496 | 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE, |
| 475 | 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE, | 497 | 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE, |
| 476 | 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E, | 498 | 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E, |
| 477 | 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E, | 499 | 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E, |
| 478 | 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E, | 500 | 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E, |
| 479 | 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E, | 501 | 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E, |
| 480 | 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E, | 502 | 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E, |
| 481 | 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E, | 503 | 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E, |
| 482 | 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E, | 504 | 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E, |
| 483 | 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E, | 505 | 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E, |
| 484 | 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE, | 506 | 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE, |
| 485 | 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE, | 507 | 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE, |
| 486 | 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE, | 508 | 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE, |
| 487 | 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE, | 509 | 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE, |
| 488 | 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E, | 510 | 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E, |
| 489 | 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E, | 511 | 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E, |
| 490 | 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE, | 512 | 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE, |
| 491 | 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE }; | 513 | 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE }; |
| 492 | /* | 514 | /* |
| 493 | * This pre-processing phase slows down procedure by approximately | 515 | * This pre-processing phase slows down procedure by approximately |
| 494 | * same time as it makes each loop spin faster. In other words | 516 | * same time as it makes each loop spin faster. In other words |
| 495 | * single block performance is approximately same as straightforward | 517 | * single block performance is approximately same as straightforward |
| 496 | * "4-bit" implementation, and then it goes only faster... | 518 | * "4-bit" implementation, and then it goes only faster... |
| 497 | */ | 519 | */ |
| 498 | for (cnt=0; cnt<16; ++cnt) { | 520 | for (cnt = 0; cnt < 16; ++cnt) { |
| 499 | Z.hi = Htable[cnt].hi; | 521 | Z.hi = Htable[cnt].hi; |
| 500 | Z.lo = Htable[cnt].lo; | 522 | Z.lo = Htable[cnt].lo; |
| 501 | Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4); | 523 | Hshr4[cnt].lo = (Z.hi << 60)|(Z.lo >> 4); |
| 502 | Hshr4[cnt].hi = (Z.hi>>4); | 524 | Hshr4[cnt].hi = (Z.hi >> 4); |
| 503 | Hshl4[cnt] = (u8)(Z.lo<<4); | 525 | Hshl4[cnt] = (u8)(Z.lo << 4); |
| 504 | } | 526 | } |
| 505 | |||
| 506 | do { | ||
| 507 | for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) { | ||
| 508 | nlo = ((const u8 *)Xi)[cnt]; | ||
| 509 | nlo ^= inp[cnt]; | ||
| 510 | nhi = nlo>>4; | ||
| 511 | nlo &= 0xf; | ||
| 512 | 527 | ||
| 513 | Z.hi ^= Htable[nlo].hi; | 528 | do { |
| 514 | Z.lo ^= Htable[nlo].lo; | 529 | for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) { |
| 530 | nlo = ((const u8 *)Xi)[cnt]; | ||
| 531 | nlo ^= inp[cnt]; | ||
| 532 | nhi = nlo >> 4; | ||
| 533 | nlo &= 0xf; | ||
| 515 | 534 | ||
| 516 | rem = (size_t)Z.lo&0xff; | 535 | Z.hi ^= Htable[nlo].hi; |
| 536 | Z.lo ^= Htable[nlo].lo; | ||
| 517 | 537 | ||
| 518 | Z.lo = (Z.hi<<56)|(Z.lo>>8); | 538 | rem = (size_t)Z.lo & 0xff; |
| 519 | Z.hi = (Z.hi>>8); | ||
| 520 | 539 | ||
| 521 | Z.hi ^= Hshr4[nhi].hi; | 540 | Z.lo = (Z.hi << 56)|(Z.lo >> 8); |
| 522 | Z.lo ^= Hshr4[nhi].lo; | 541 | Z.hi = (Z.hi >> 8); |
| 523 | Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48; | ||
| 524 | } | ||
| 525 | 542 | ||
| 526 | nlo = ((const u8 *)Xi)[0]; | 543 | Z.hi ^= Hshr4[nhi].hi; |
| 527 | nlo ^= inp[0]; | 544 | Z.lo ^= Hshr4[nhi].lo; |
| 528 | nhi = nlo>>4; | 545 | Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48; |
| 529 | nlo &= 0xf; | 546 | } |
| 530 | 547 | ||
| 531 | Z.hi ^= Htable[nlo].hi; | 548 | nlo = ((const u8 *)Xi)[0]; |
| 532 | Z.lo ^= Htable[nlo].lo; | 549 | nlo ^= inp[0]; |
| 550 | nhi = nlo >> 4; | ||
| 551 | nlo &= 0xf; | ||
| 552 | |||
| 553 | Z.hi ^= Htable[nlo].hi; | ||
| 554 | Z.lo ^= Htable[nlo].lo; | ||
| 533 | 555 | ||
| 534 | rem = (size_t)Z.lo&0xf; | 556 | rem = (size_t)Z.lo & 0xf; |
| 535 | 557 | ||
| 536 | Z.lo = (Z.hi<<60)|(Z.lo>>4); | 558 | Z.lo = (Z.hi << 60)|(Z.lo >> 4); |
| 537 | Z.hi = (Z.hi>>4); | 559 | Z.hi = (Z.hi >> 4); |
| 538 | 560 | ||
| 539 | Z.hi ^= Htable[nhi].hi; | 561 | Z.hi ^= Htable[nhi].hi; |
| 540 | Z.lo ^= Htable[nhi].lo; | 562 | Z.lo ^= Htable[nhi].lo; |
| 541 | Z.hi ^= ((u64)rem_8bit[rem<<4])<<48; | 563 | Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48; |
| 542 | #endif | 564 | #endif |
| 543 | 565 | ||
| 544 | #if BYTE_ORDER == LITTLE_ENDIAN | 566 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 545 | #ifdef BSWAP8 | 567 | #ifdef BSWAP8 |
| 546 | Xi[0] = BSWAP8(Z.hi); | 568 | Xi[0] = BSWAP8(Z.hi); |
| 547 | Xi[1] = BSWAP8(Z.lo); | 569 | Xi[1] = BSWAP8(Z.lo); |
| 548 | #else | 570 | #else |
| 549 | u8 *p = (u8 *)Xi; | 571 | u8 *p = (u8 *)Xi; |
| 550 | u32 v; | 572 | u32 v; |
| 551 | v = (u32)(Z.hi>>32); PUTU32(p,v); | 573 | v = (u32)(Z.hi >> 32); |
| 552 | v = (u32)(Z.hi); PUTU32(p+4,v); | 574 | PUTU32(p, v); |
| 553 | v = (u32)(Z.lo>>32); PUTU32(p+8,v); | 575 | v = (u32)(Z.hi); |
| 554 | v = (u32)(Z.lo); PUTU32(p+12,v); | 576 | PUTU32(p + 4, v); |
| 577 | v = (u32)(Z.lo >> 32); | ||
| 578 | PUTU32(p + 8, v); | ||
| 579 | v = (u32)(Z.lo); | ||
| 580 | PUTU32(p + 12, v); | ||
| 555 | #endif | 581 | #endif |
| 556 | #else /* BIG_ENDIAN */ | 582 | #else /* BIG_ENDIAN */ |
| 557 | Xi[0] = Z.hi; | 583 | Xi[0] = Z.hi; |
| 558 | Xi[1] = Z.lo; | 584 | Xi[1] = Z.lo; |
| 559 | #endif | 585 | #endif |
| 560 | } while (inp+=16, len-=16); | 586 | } while (inp += 16, len -= 16); |
| 561 | } | 587 | } |
| 562 | #endif | 588 | #endif |
| 563 | #else | 589 | #else |
| 564 | void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]); | 590 | void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]); |
| 565 | void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); | 591 | void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp, |
| 592 | size_t len); | ||
| 566 | #endif | 593 | #endif |
| 567 | 594 | ||
| 568 | #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable) | 595 | #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable) |
| @@ -576,37 +603,38 @@ void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); | |||
| 576 | 603 | ||
| 577 | #else /* TABLE_BITS */ | 604 | #else /* TABLE_BITS */ |
| 578 | 605 | ||
| 579 | static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2]) | 606 | static void |
| 607 | gcm_gmult_1bit(u64 Xi[2], const u64 H[2]) | ||
| 580 | { | 608 | { |
| 581 | u128 V,Z = { 0,0 }; | 609 | u128 V, Z = { 0,0 }; |
| 582 | long X; | 610 | long X; |
| 583 | int i,j; | 611 | int i, j; |
| 584 | const long *xi = (const long *)Xi; | 612 | const long *xi = (const long *)Xi; |
| 585 | 613 | ||
| 586 | V.hi = H[0]; /* H is in host byte order, no byte swapping */ | 614 | V.hi = H[0]; /* H is in host byte order, no byte swapping */ |
| 587 | V.lo = H[1]; | 615 | V.lo = H[1]; |
| 588 | 616 | ||
| 589 | for (j=0; j<16/sizeof(long); ++j) { | 617 | for (j = 0; j < 16/sizeof(long); ++j) { |
| 590 | #if BYTE_ORDER == LITTLE_ENDIAN | 618 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 591 | #if SIZE_MAX == 0xffffffffffffffff | 619 | #if SIZE_MAX == 0xffffffffffffffff |
| 592 | #ifdef BSWAP8 | 620 | #ifdef BSWAP8 |
| 593 | X = (long)(BSWAP8(xi[j])); | 621 | X = (long)(BSWAP8(xi[j])); |
| 594 | #else | 622 | #else |
| 595 | const u8 *p = (const u8 *)(xi+j); | 623 | const u8 *p = (const u8 *)(xi + j); |
| 596 | X = (long)((u64)GETU32(p)<<32|GETU32(p+4)); | 624 | X = (long)((u64)GETU32(p) << 32|GETU32(p + 4)); |
| 597 | #endif | 625 | #endif |
| 598 | #else | 626 | #else |
| 599 | const u8 *p = (const u8 *)(xi+j); | 627 | const u8 *p = (const u8 *)(xi + j); |
| 600 | X = (long)GETU32(p); | 628 | X = (long)GETU32(p); |
| 601 | #endif | 629 | #endif |
| 602 | #else /* BIG_ENDIAN */ | 630 | #else /* BIG_ENDIAN */ |
| 603 | X = xi[j]; | 631 | X = xi[j]; |
| 604 | #endif | 632 | #endif |
| 605 | 633 | ||
| 606 | for (i=0; i<8*sizeof(long); ++i, X<<=1) { | 634 | for (i = 0; i < 8*sizeof(long); ++i, X <<= 1) { |
| 607 | u64 M = (u64)(X>>(8*sizeof(long)-1)); | 635 | u64 M = (u64)(X >> (8*sizeof(long) - 1)); |
| 608 | Z.hi ^= V.hi&M; | 636 | Z.hi ^= V.hi & M; |
| 609 | Z.lo ^= V.lo&M; | 637 | Z.lo ^= V.lo & M; |
| 610 | 638 | ||
| 611 | REDUCE1BIT(V); | 639 | REDUCE1BIT(V); |
| 612 | } | 640 | } |
| @@ -619,10 +647,14 @@ static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2]) | |||
| 619 | #else | 647 | #else |
| 620 | u8 *p = (u8 *)Xi; | 648 | u8 *p = (u8 *)Xi; |
| 621 | u32 v; | 649 | u32 v; |
| 622 | v = (u32)(Z.hi>>32); PUTU32(p,v); | 650 | v = (u32)(Z.hi >> 32); |
| 623 | v = (u32)(Z.hi); PUTU32(p+4,v); | 651 | PUTU32(p, v); |
| 624 | v = (u32)(Z.lo>>32); PUTU32(p+8,v); | 652 | v = (u32)(Z.hi); |
| 625 | v = (u32)(Z.lo); PUTU32(p+12,v); | 653 | PUTU32(p + 4, v); |
| 654 | v = (u32)(Z.lo >> 32); | ||
| 655 | PUTU32(p + 8, v); | ||
| 656 | v = (u32)(Z.lo); | ||
| 657 | PUTU32(p + 12, v); | ||
| 626 | #endif | 658 | #endif |
| 627 | #else /* BIG_ENDIAN */ | 659 | #else /* BIG_ENDIAN */ |
| 628 | Xi[0] = Z.hi; | 660 | Xi[0] = Z.hi; |
| @@ -633,39 +665,43 @@ static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2]) | |||
| 633 | 665 | ||
| 634 | #endif | 666 | #endif |
| 635 | 667 | ||
| 636 | #if defined(GHASH_ASM) && \ | 668 | #if defined(GHASH_ASM) && \ |
| 637 | (defined(__i386) || defined(__i386__) || \ | 669 | (defined(__i386) || defined(__i386__) || \ |
| 638 | defined(__x86_64) || defined(__x86_64__) || \ | 670 | defined(__x86_64) || defined(__x86_64__) || \ |
| 639 | defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) | 671 | defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) |
| 640 | #include "x86_arch.h" | 672 | #include "x86_arch.h" |
| 641 | #endif | 673 | #endif |
| 642 | 674 | ||
| 643 | #if TABLE_BITS==4 && defined(GHASH_ASM) | 675 | #if TABLE_BITS==4 && defined(GHASH_ASM) |
| 644 | # if (defined(__i386) || defined(__i386__) || \ | 676 | # if (defined(__i386) || defined(__i386__) || \ |
| 645 | defined(__x86_64) || defined(__x86_64__) || \ | 677 | defined(__x86_64) || defined(__x86_64__) || \ |
| 646 | defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) | 678 | defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) |
| 647 | # define GHASH_ASM_X86_OR_64 | 679 | # define GHASH_ASM_X86_OR_64 |
| 648 | # define GCM_FUNCREF_4BIT | 680 | # define GCM_FUNCREF_4BIT |
| 649 | 681 | ||
| 650 | void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]); | 682 | void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]); |
| 651 | void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]); | 683 | void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]); |
| 652 | void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); | 684 | void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp, |
| 685 | size_t len); | ||
| 653 | 686 | ||
| 654 | # if defined(__i386) || defined(__i386__) || defined(_M_IX86) | 687 | # if defined(__i386) || defined(__i386__) || defined(_M_IX86) |
| 655 | # define GHASH_ASM_X86 | 688 | # define GHASH_ASM_X86 |
| 656 | void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]); | 689 | void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]); |
| 657 | void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); | 690 | void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp, |
| 691 | size_t len); | ||
| 658 | 692 | ||
| 659 | void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]); | 693 | void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]); |
| 660 | void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); | 694 | void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp, |
| 695 | size_t len); | ||
| 661 | # endif | 696 | # endif |
| 662 | # elif defined(__arm__) || defined(__arm) | 697 | # elif defined(__arm__) || defined(__arm) |
| 663 | # include "arm_arch.h" | 698 | # include "arm_arch.h" |
| 664 | # if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) | 699 | # if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) |
| 665 | # define GHASH_ASM_ARM | 700 | # define GHASH_ASM_ARM |
| 666 | # define GCM_FUNCREF_4BIT | 701 | # define GCM_FUNCREF_4BIT |
| 667 | void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]); | 702 | void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]); |
| 668 | void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); | 703 | void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp, |
| 704 | size_t len); | ||
| 669 | # endif | 705 | # endif |
| 670 | # endif | 706 | # endif |
| 671 | #endif | 707 | #endif |
| @@ -679,13 +715,14 @@ void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); | |||
| 679 | # endif | 715 | # endif |
| 680 | #endif | 716 | #endif |
| 681 | 717 | ||
| 682 | void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) | 718 | void |
| 719 | CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) | ||
| 683 | { | 720 | { |
| 684 | memset(ctx,0,sizeof(*ctx)); | 721 | memset(ctx, 0, sizeof(*ctx)); |
| 685 | ctx->block = block; | 722 | ctx->block = block; |
| 686 | ctx->key = key; | 723 | ctx->key = key; |
| 687 | 724 | ||
| 688 | (*block)(ctx->H.c,ctx->H.c,key); | 725 | (*block)(ctx->H.c, ctx->H.c, key); |
| 689 | 726 | ||
| 690 | #if BYTE_ORDER == LITTLE_ENDIAN | 727 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 691 | /* H is stored in host byte order */ | 728 | /* H is stored in host byte order */ |
| @@ -694,29 +731,29 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) | |||
| 694 | ctx->H.u[1] = BSWAP8(ctx->H.u[1]); | 731 | ctx->H.u[1] = BSWAP8(ctx->H.u[1]); |
| 695 | #else | 732 | #else |
| 696 | u8 *p = ctx->H.c; | 733 | u8 *p = ctx->H.c; |
| 697 | u64 hi,lo; | 734 | u64 hi, lo; |
| 698 | hi = (u64)GETU32(p) <<32|GETU32(p+4); | 735 | hi = (u64)GETU32(p) << 32|GETU32(p + 4); |
| 699 | lo = (u64)GETU32(p+8)<<32|GETU32(p+12); | 736 | lo = (u64)GETU32(p + 8) << 32|GETU32(p + 12); |
| 700 | ctx->H.u[0] = hi; | 737 | ctx->H.u[0] = hi; |
| 701 | ctx->H.u[1] = lo; | 738 | ctx->H.u[1] = lo; |
| 702 | #endif | 739 | #endif |
| 703 | #endif | 740 | #endif |
| 704 | 741 | ||
| 705 | #if TABLE_BITS==8 | 742 | #if TABLE_BITS==8 |
| 706 | gcm_init_8bit(ctx->Htable,ctx->H.u); | 743 | gcm_init_8bit(ctx->Htable, ctx->H.u); |
| 707 | #elif TABLE_BITS==4 | 744 | #elif TABLE_BITS==4 |
| 708 | # if defined(GHASH_ASM_X86_OR_64) | 745 | # if defined(GHASH_ASM_X86_OR_64) |
| 709 | # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) | 746 | # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) |
| 710 | /* check FXSR and PCLMULQDQ bits */ | 747 | /* check FXSR and PCLMULQDQ bits */ |
| 711 | if ((OPENSSL_cpu_caps() & (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) == | 748 | if ((OPENSSL_cpu_caps() & (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) == |
| 712 | (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) { | 749 | (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) { |
| 713 | gcm_init_clmul(ctx->Htable,ctx->H.u); | 750 | gcm_init_clmul(ctx->Htable, ctx->H.u); |
| 714 | ctx->gmult = gcm_gmult_clmul; | 751 | ctx->gmult = gcm_gmult_clmul; |
| 715 | ctx->ghash = gcm_ghash_clmul; | 752 | ctx->ghash = gcm_ghash_clmul; |
| 716 | return; | 753 | return; |
| 717 | } | 754 | } |
| 718 | # endif | 755 | # endif |
| 719 | gcm_init_4bit(ctx->Htable,ctx->H.u); | 756 | gcm_init_4bit(ctx->Htable, ctx->H.u); |
| 720 | # if defined(GHASH_ASM_X86) /* x86 only */ | 757 | # if defined(GHASH_ASM_X86) /* x86 only */ |
| 721 | # if defined(OPENSSL_IA32_SSE2) | 758 | # if defined(OPENSSL_IA32_SSE2) |
| 722 | if (OPENSSL_cpu_caps() & CPUCAP_MASK_SSE) { /* check SSE bit */ | 759 | if (OPENSSL_cpu_caps() & CPUCAP_MASK_SSE) { /* check SSE bit */ |
| @@ -738,112 +775,116 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) | |||
| 738 | ctx->gmult = gcm_gmult_neon; | 775 | ctx->gmult = gcm_gmult_neon; |
| 739 | ctx->ghash = gcm_ghash_neon; | 776 | ctx->ghash = gcm_ghash_neon; |
| 740 | } else { | 777 | } else { |
| 741 | gcm_init_4bit(ctx->Htable,ctx->H.u); | 778 | gcm_init_4bit(ctx->Htable, ctx->H.u); |
| 742 | ctx->gmult = gcm_gmult_4bit; | 779 | ctx->gmult = gcm_gmult_4bit; |
| 743 | ctx->ghash = gcm_ghash_4bit; | 780 | ctx->ghash = gcm_ghash_4bit; |
| 744 | } | 781 | } |
| 745 | # else | 782 | # else |
| 746 | gcm_init_4bit(ctx->Htable,ctx->H.u); | 783 | gcm_init_4bit(ctx->Htable, ctx->H.u); |
| 747 | # endif | 784 | # endif |
| 748 | #endif | 785 | #endif |
| 749 | } | 786 | } |
| 750 | 787 | ||
| 751 | void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len) | 788 | void |
| 789 | CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, size_t len) | ||
| 752 | { | 790 | { |
| 753 | unsigned int ctr; | 791 | unsigned int ctr; |
| 754 | #ifdef GCM_FUNCREF_4BIT | 792 | #ifdef GCM_FUNCREF_4BIT |
| 755 | void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; | 793 | void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult; |
| 756 | #endif | 794 | #endif |
| 757 | 795 | ||
| 758 | ctx->Yi.u[0] = 0; | 796 | ctx->Yi.u[0] = 0; |
| 759 | ctx->Yi.u[1] = 0; | 797 | ctx->Yi.u[1] = 0; |
| 760 | ctx->Xi.u[0] = 0; | 798 | ctx->Xi.u[0] = 0; |
| 761 | ctx->Xi.u[1] = 0; | 799 | ctx->Xi.u[1] = 0; |
| 762 | ctx->len.u[0] = 0; /* AAD length */ | 800 | ctx->len.u[0] = 0; /* AAD length */ |
| 763 | ctx->len.u[1] = 0; /* message length */ | 801 | ctx->len.u[1] = 0; /* message length */ |
| 764 | ctx->ares = 0; | 802 | ctx->ares = 0; |
| 765 | ctx->mres = 0; | 803 | ctx->mres = 0; |
| 766 | 804 | ||
| 767 | if (len==12) { | 805 | if (len == 12) { |
| 768 | memcpy(ctx->Yi.c,iv,12); | 806 | memcpy(ctx->Yi.c, iv, 12); |
| 769 | ctx->Yi.c[15]=1; | 807 | ctx->Yi.c[15] = 1; |
| 770 | ctr=1; | 808 | ctr = 1; |
| 771 | } | 809 | } else { |
| 772 | else { | ||
| 773 | size_t i; | 810 | size_t i; |
| 774 | u64 len0 = len; | 811 | u64 len0 = len; |
| 775 | 812 | ||
| 776 | while (len>=16) { | 813 | while (len >= 16) { |
| 777 | for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i]; | 814 | for (i = 0; i < 16; ++i) |
| 778 | GCM_MUL(ctx,Yi); | 815 | ctx->Yi.c[i] ^= iv[i]; |
| 816 | GCM_MUL(ctx, Yi); | ||
| 779 | iv += 16; | 817 | iv += 16; |
| 780 | len -= 16; | 818 | len -= 16; |
| 781 | } | 819 | } |
| 782 | if (len) { | 820 | if (len) { |
| 783 | for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i]; | 821 | for (i = 0; i < len; ++i) |
| 784 | GCM_MUL(ctx,Yi); | 822 | ctx->Yi.c[i] ^= iv[i]; |
| 823 | GCM_MUL(ctx, Yi); | ||
| 785 | } | 824 | } |
| 786 | len0 <<= 3; | 825 | len0 <<= 3; |
| 787 | #if BYTE_ORDER == LITTLE_ENDIAN | 826 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 788 | #ifdef BSWAP8 | 827 | #ifdef BSWAP8 |
| 789 | ctx->Yi.u[1] ^= BSWAP8(len0); | 828 | ctx->Yi.u[1] ^= BSWAP8(len0); |
| 790 | #else | 829 | #else |
| 791 | ctx->Yi.c[8] ^= (u8)(len0>>56); | 830 | ctx->Yi.c[8] ^= (u8)(len0 >> 56); |
| 792 | ctx->Yi.c[9] ^= (u8)(len0>>48); | 831 | ctx->Yi.c[9] ^= (u8)(len0 >> 48); |
| 793 | ctx->Yi.c[10] ^= (u8)(len0>>40); | 832 | ctx->Yi.c[10] ^= (u8)(len0 >> 40); |
| 794 | ctx->Yi.c[11] ^= (u8)(len0>>32); | 833 | ctx->Yi.c[11] ^= (u8)(len0 >> 32); |
| 795 | ctx->Yi.c[12] ^= (u8)(len0>>24); | 834 | ctx->Yi.c[12] ^= (u8)(len0 >> 24); |
| 796 | ctx->Yi.c[13] ^= (u8)(len0>>16); | 835 | ctx->Yi.c[13] ^= (u8)(len0 >> 16); |
| 797 | ctx->Yi.c[14] ^= (u8)(len0>>8); | 836 | ctx->Yi.c[14] ^= (u8)(len0 >> 8); |
| 798 | ctx->Yi.c[15] ^= (u8)(len0); | 837 | ctx->Yi.c[15] ^= (u8)(len0); |
| 799 | #endif | 838 | #endif |
| 800 | #else /* BIG_ENDIAN */ | 839 | #else /* BIG_ENDIAN */ |
| 801 | ctx->Yi.u[1] ^= len0; | 840 | ctx->Yi.u[1] ^= len0; |
| 802 | #endif | 841 | #endif |
| 803 | 842 | ||
| 804 | GCM_MUL(ctx,Yi); | 843 | GCM_MUL(ctx, Yi); |
| 805 | 844 | ||
| 806 | #if BYTE_ORDER == LITTLE_ENDIAN | 845 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 807 | #ifdef BSWAP4 | 846 | #ifdef BSWAP4 |
| 808 | ctr = BSWAP4(ctx->Yi.d[3]); | 847 | ctr = BSWAP4(ctx->Yi.d[3]); |
| 809 | #else | 848 | #else |
| 810 | ctr = GETU32(ctx->Yi.c+12); | 849 | ctr = GETU32(ctx->Yi.c + 12); |
| 811 | #endif | 850 | #endif |
| 812 | #else /* BIG_ENDIAN */ | 851 | #else /* BIG_ENDIAN */ |
| 813 | ctr = ctx->Yi.d[3]; | 852 | ctr = ctx->Yi.d[3]; |
| 814 | #endif | 853 | #endif |
| 815 | } | 854 | } |
| 816 | 855 | ||
| 817 | (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key); | 856 | (*ctx->block)(ctx->Yi.c, ctx->EK0.c, ctx->key); |
| 818 | ++ctr; | 857 | ++ctr; |
| 819 | #if BYTE_ORDER == LITTLE_ENDIAN | 858 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 820 | #ifdef BSWAP4 | 859 | #ifdef BSWAP4 |
| 821 | ctx->Yi.d[3] = BSWAP4(ctr); | 860 | ctx->Yi.d[3] = BSWAP4(ctr); |
| 822 | #else | 861 | #else |
| 823 | PUTU32(ctx->Yi.c+12,ctr); | 862 | PUTU32(ctx->Yi.c + 12, ctr); |
| 824 | #endif | 863 | #endif |
| 825 | #else /* BIG_ENDIAN */ | 864 | #else /* BIG_ENDIAN */ |
| 826 | ctx->Yi.d[3] = ctr; | 865 | ctx->Yi.d[3] = ctr; |
| 827 | #endif | 866 | #endif |
| 828 | } | 867 | } |
| 829 | 868 | ||
| 830 | int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len) | 869 | int |
| 870 | CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, size_t len) | ||
| 831 | { | 871 | { |
| 832 | size_t i; | 872 | size_t i; |
| 833 | unsigned int n; | 873 | unsigned int n; |
| 834 | u64 alen = ctx->len.u[0]; | 874 | u64 alen = ctx->len.u[0]; |
| 835 | #ifdef GCM_FUNCREF_4BIT | 875 | #ifdef GCM_FUNCREF_4BIT |
| 836 | void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; | 876 | void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult; |
| 837 | # ifdef GHASH | 877 | # ifdef GHASH |
| 838 | void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], | 878 | void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16], |
| 839 | const u8 *inp,size_t len) = ctx->ghash; | 879 | const u8 *inp, size_t len) = ctx->ghash; |
| 840 | # endif | 880 | # endif |
| 841 | #endif | 881 | #endif |
| 842 | 882 | ||
| 843 | if (ctx->len.u[1]) return -2; | 883 | if (ctx->len.u[1]) |
| 884 | return -2; | ||
| 844 | 885 | ||
| 845 | alen += len; | 886 | alen += len; |
| 846 | if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len)) | 887 | if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len)) |
| 847 | return -1; | 888 | return -1; |
| 848 | ctx->len.u[0] = alen; | 889 | ctx->len.u[0] = alen; |
| 849 | 890 | ||
| @@ -852,9 +893,10 @@ int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len) | |||
| 852 | while (n && len) { | 893 | while (n && len) { |
| 853 | ctx->Xi.c[n] ^= *(aad++); | 894 | ctx->Xi.c[n] ^= *(aad++); |
| 854 | --len; | 895 | --len; |
| 855 | n = (n+1)%16; | 896 | n = (n + 1) % 16; |
| 856 | } | 897 | } |
| 857 | if (n==0) GCM_MUL(ctx,Xi); | 898 | if (n == 0) |
| 899 | GCM_MUL(ctx, Xi); | ||
| 858 | else { | 900 | else { |
| 859 | ctx->ares = n; | 901 | ctx->ares = n; |
| 860 | return 0; | 902 | return 0; |
| @@ -862,53 +904,56 @@ int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len) | |||
| 862 | } | 904 | } |
| 863 | 905 | ||
| 864 | #ifdef GHASH | 906 | #ifdef GHASH |
| 865 | if ((i = (len&(size_t)-16))) { | 907 | if ((i = (len & (size_t)-16))) { |
| 866 | GHASH(ctx,aad,i); | 908 | GHASH(ctx, aad, i); |
| 867 | aad += i; | 909 | aad += i; |
| 868 | len -= i; | 910 | len -= i; |
| 869 | } | 911 | } |
| 870 | #else | 912 | #else |
| 871 | while (len>=16) { | 913 | while (len >= 16) { |
| 872 | for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i]; | 914 | for (i = 0; i < 16; ++i) |
| 873 | GCM_MUL(ctx,Xi); | 915 | ctx->Xi.c[i] ^= aad[i]; |
| 916 | GCM_MUL(ctx, Xi); | ||
| 874 | aad += 16; | 917 | aad += 16; |
| 875 | len -= 16; | 918 | len -= 16; |
| 876 | } | 919 | } |
| 877 | #endif | 920 | #endif |
| 878 | if (len) { | 921 | if (len) { |
| 879 | n = (unsigned int)len; | 922 | n = (unsigned int)len; |
| 880 | for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i]; | 923 | for (i = 0; i < len; ++i) |
| 924 | ctx->Xi.c[i] ^= aad[i]; | ||
| 881 | } | 925 | } |
| 882 | 926 | ||
| 883 | ctx->ares = n; | 927 | ctx->ares = n; |
| 884 | return 0; | 928 | return 0; |
| 885 | } | 929 | } |
| 886 | 930 | ||
| 887 | int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, | 931 | int |
| 888 | const unsigned char *in, unsigned char *out, | 932 | CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, |
| 889 | size_t len) | 933 | const unsigned char *in, unsigned char *out, |
| 934 | size_t len) | ||
| 890 | { | 935 | { |
| 891 | unsigned int n, ctr; | 936 | unsigned int n, ctr; |
| 892 | size_t i; | 937 | size_t i; |
| 893 | u64 mlen = ctx->len.u[1]; | 938 | u64 mlen = ctx->len.u[1]; |
| 894 | block128_f block = ctx->block; | 939 | block128_f block = ctx->block; |
| 895 | void *key = ctx->key; | 940 | void *key = ctx->key; |
| 896 | #ifdef GCM_FUNCREF_4BIT | 941 | #ifdef GCM_FUNCREF_4BIT |
| 897 | void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; | 942 | void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult; |
| 898 | # ifdef GHASH | 943 | # ifdef GHASH |
| 899 | void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], | 944 | void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16], |
| 900 | const u8 *inp,size_t len) = ctx->ghash; | 945 | const u8 *inp, size_t len) = ctx->ghash; |
| 901 | # endif | 946 | # endif |
| 902 | #endif | 947 | #endif |
| 903 | 948 | ||
| 904 | mlen += len; | 949 | mlen += len; |
| 905 | if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len)) | 950 | if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) |
| 906 | return -1; | 951 | return -1; |
| 907 | ctx->len.u[1] = mlen; | 952 | ctx->len.u[1] = mlen; |
| 908 | 953 | ||
| 909 | if (ctx->ares) { | 954 | if (ctx->ares) { |
| 910 | /* First call to encrypt finalizes GHASH(AAD) */ | 955 | /* First call to encrypt finalizes GHASH(AAD) */ |
| 911 | GCM_MUL(ctx,Xi); | 956 | GCM_MUL(ctx, Xi); |
| 912 | ctx->ares = 0; | 957 | ctx->ares = 0; |
| 913 | } | 958 | } |
| 914 | 959 | ||
| @@ -916,7 +961,7 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, | |||
| 916 | #ifdef BSWAP4 | 961 | #ifdef BSWAP4 |
| 917 | ctr = BSWAP4(ctx->Yi.d[3]); | 962 | ctr = BSWAP4(ctx->Yi.d[3]); |
| 918 | #else | 963 | #else |
| 919 | ctr = GETU32(ctx->Yi.c+12); | 964 | ctr = GETU32(ctx->Yi.c + 12); |
| 920 | #endif | 965 | #endif |
| 921 | #else /* BIG_ENDIAN */ | 966 | #else /* BIG_ENDIAN */ |
| 922 | ctr = ctx->Yi.d[3]; | 967 | ctr = ctx->Yi.d[3]; |
| @@ -924,173 +969,180 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, | |||
| 924 | 969 | ||
| 925 | n = ctx->mres; | 970 | n = ctx->mres; |
| 926 | #if !defined(OPENSSL_SMALL_FOOTPRINT) | 971 | #if !defined(OPENSSL_SMALL_FOOTPRINT) |
| 927 | if (16%sizeof(size_t) == 0) do { /* always true actually */ | 972 | if (16 % sizeof(size_t) == 0) |
| 928 | if (n) { | 973 | do { /* always true actually */ |
| 929 | while (n && len) { | 974 | if (n) { |
| 930 | ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n]; | 975 | while (n && len) { |
| 931 | --len; | 976 | ctx->Xi.c[n] ^= *(out++) = *(in++) ^ |
| 932 | n = (n+1)%16; | 977 | ctx->EKi.c[n]; |
| 978 | --len; | ||
| 979 | n = (n + 1) % 16; | ||
| 980 | } | ||
| 981 | if (n == 0) | ||
| 982 | GCM_MUL(ctx, Xi); | ||
| 983 | else { | ||
| 984 | ctx->mres = n; | ||
| 985 | return 0; | ||
| 986 | } | ||
| 933 | } | 987 | } |
| 934 | if (n==0) GCM_MUL(ctx,Xi); | ||
| 935 | else { | ||
| 936 | ctx->mres = n; | ||
| 937 | return 0; | ||
| 938 | } | ||
| 939 | } | ||
| 940 | #ifdef __STRICT_ALIGNMENT | 988 | #ifdef __STRICT_ALIGNMENT |
| 941 | if (((size_t)in|(size_t)out)%sizeof(size_t) != 0) | 989 | if (((size_t)in|(size_t)out) % sizeof(size_t) != 0) |
| 942 | break; | 990 | break; |
| 943 | #endif | 991 | #endif |
| 944 | #if defined(GHASH) && defined(GHASH_CHUNK) | 992 | #if defined(GHASH) && defined(GHASH_CHUNK) |
| 945 | while (len>=GHASH_CHUNK) { | 993 | while (len >= GHASH_CHUNK) { |
| 946 | size_t j=GHASH_CHUNK; | 994 | size_t j = GHASH_CHUNK; |
| 947 | 995 | ||
| 948 | while (j) { | 996 | while (j) { |
| 949 | size_t *out_t=(size_t *)out; | 997 | size_t *out_t = (size_t *)out; |
| 950 | const size_t *in_t=(const size_t *)in; | 998 | const size_t *in_t = (const size_t *)in; |
| 951 | 999 | ||
| 952 | (*block)(ctx->Yi.c,ctx->EKi.c,key); | 1000 | (*block)(ctx->Yi.c, ctx->EKi.c, key); |
| 953 | ++ctr; | 1001 | ++ctr; |
| 954 | #if BYTE_ORDER == LITTLE_ENDIAN | 1002 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 955 | #ifdef BSWAP4 | 1003 | #ifdef BSWAP4 |
| 956 | ctx->Yi.d[3] = BSWAP4(ctr); | 1004 | ctx->Yi.d[3] = BSWAP4(ctr); |
| 957 | #else | 1005 | #else |
| 958 | PUTU32(ctx->Yi.c+12,ctr); | 1006 | PUTU32(ctx->Yi.c + 12, ctr); |
| 959 | #endif | 1007 | #endif |
| 960 | #else /* BIG_ENDIAN */ | 1008 | #else /* BIG_ENDIAN */ |
| 961 | ctx->Yi.d[3] = ctr; | 1009 | ctx->Yi.d[3] = ctr; |
| 962 | #endif | 1010 | #endif |
| 963 | for (i=0; i<16/sizeof(size_t); ++i) | 1011 | for (i = 0; i < 16/sizeof(size_t); ++i) |
| 964 | out_t[i] = in_t[i] ^ ctx->EKi.t[i]; | 1012 | out_t[i] = in_t[i] ^ |
| 965 | out += 16; | 1013 | ctx->EKi.t[i]; |
| 966 | in += 16; | 1014 | out += 16; |
| 967 | j -= 16; | 1015 | in += 16; |
| 968 | } | 1016 | j -= 16; |
| 969 | GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK); | 1017 | } |
| 970 | len -= GHASH_CHUNK; | 1018 | GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK); |
| 971 | } | 1019 | len -= GHASH_CHUNK; |
| 972 | if ((i = (len&(size_t)-16))) { | 1020 | } |
| 973 | size_t j=i; | 1021 | if ((i = (len & (size_t)-16))) { |
| 1022 | size_t j = i; | ||
| 974 | 1023 | ||
| 975 | while (len>=16) { | 1024 | while (len >= 16) { |
| 976 | size_t *out_t=(size_t *)out; | 1025 | size_t *out_t = (size_t *)out; |
| 977 | const size_t *in_t=(const size_t *)in; | 1026 | const size_t *in_t = (const size_t *)in; |
| 978 | 1027 | ||
| 979 | (*block)(ctx->Yi.c,ctx->EKi.c,key); | 1028 | (*block)(ctx->Yi.c, ctx->EKi.c, key); |
| 980 | ++ctr; | 1029 | ++ctr; |
| 981 | #if BYTE_ORDER == LITTLE_ENDIAN | 1030 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 982 | #ifdef BSWAP4 | 1031 | #ifdef BSWAP4 |
| 983 | ctx->Yi.d[3] = BSWAP4(ctr); | 1032 | ctx->Yi.d[3] = BSWAP4(ctr); |
| 984 | #else | 1033 | #else |
| 985 | PUTU32(ctx->Yi.c+12,ctr); | 1034 | PUTU32(ctx->Yi.c + 12, ctr); |
| 986 | #endif | 1035 | #endif |
| 987 | #else /* BIG_ENDIAN */ | 1036 | #else /* BIG_ENDIAN */ |
| 988 | ctx->Yi.d[3] = ctr; | 1037 | ctx->Yi.d[3] = ctr; |
| 989 | #endif | 1038 | #endif |
| 990 | for (i=0; i<16/sizeof(size_t); ++i) | 1039 | for (i = 0; i < 16/sizeof(size_t); ++i) |
| 991 | out_t[i] = in_t[i] ^ ctx->EKi.t[i]; | 1040 | out_t[i] = in_t[i] ^ |
| 992 | out += 16; | 1041 | ctx->EKi.t[i]; |
| 993 | in += 16; | 1042 | out += 16; |
| 994 | len -= 16; | 1043 | in += 16; |
| 995 | } | 1044 | len -= 16; |
| 996 | GHASH(ctx,out-j,j); | 1045 | } |
| 997 | } | 1046 | GHASH(ctx, out - j, j); |
| 1047 | } | ||
| 998 | #else | 1048 | #else |
| 999 | while (len>=16) { | 1049 | while (len >= 16) { |
| 1000 | size_t *out_t=(size_t *)out; | 1050 | size_t *out_t = (size_t *)out; |
| 1001 | const size_t *in_t=(const size_t *)in; | 1051 | const size_t *in_t = (const size_t *)in; |
| 1002 | 1052 | ||
| 1003 | (*block)(ctx->Yi.c,ctx->EKi.c,key); | 1053 | (*block)(ctx->Yi.c, ctx->EKi.c, key); |
| 1004 | ++ctr; | 1054 | ++ctr; |
| 1005 | #if BYTE_ORDER == LITTLE_ENDIAN | 1055 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 1006 | #ifdef BSWAP4 | 1056 | #ifdef BSWAP4 |
| 1007 | ctx->Yi.d[3] = BSWAP4(ctr); | 1057 | ctx->Yi.d[3] = BSWAP4(ctr); |
| 1008 | #else | 1058 | #else |
| 1009 | PUTU32(ctx->Yi.c+12,ctr); | 1059 | PUTU32(ctx->Yi.c + 12, ctr); |
| 1010 | #endif | 1060 | #endif |
| 1011 | #else /* BIG_ENDIAN */ | 1061 | #else /* BIG_ENDIAN */ |
| 1012 | ctx->Yi.d[3] = ctr; | 1062 | ctx->Yi.d[3] = ctr; |
| 1013 | #endif | 1063 | #endif |
| 1014 | for (i=0; i<16/sizeof(size_t); ++i) | 1064 | for (i = 0; i < 16/sizeof(size_t); ++i) |
| 1015 | ctx->Xi.t[i] ^= | 1065 | ctx->Xi.t[i] ^= |
| 1016 | out_t[i] = in_t[i]^ctx->EKi.t[i]; | 1066 | out_t[i] = in_t[i] ^ ctx->EKi.t[i]; |
| 1017 | GCM_MUL(ctx,Xi); | 1067 | GCM_MUL(ctx, Xi); |
| 1018 | out += 16; | 1068 | out += 16; |
| 1019 | in += 16; | 1069 | in += 16; |
| 1020 | len -= 16; | 1070 | len -= 16; |
| 1021 | } | 1071 | } |
| 1022 | #endif | 1072 | #endif |
| 1023 | if (len) { | 1073 | if (len) { |
| 1024 | (*block)(ctx->Yi.c,ctx->EKi.c,key); | 1074 | (*block)(ctx->Yi.c, ctx->EKi.c, key); |
| 1025 | ++ctr; | 1075 | ++ctr; |
| 1026 | #if BYTE_ORDER == LITTLE_ENDIAN | 1076 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 1027 | #ifdef BSWAP4 | 1077 | #ifdef BSWAP4 |
| 1028 | ctx->Yi.d[3] = BSWAP4(ctr); | 1078 | ctx->Yi.d[3] = BSWAP4(ctr); |
| 1029 | #else | 1079 | #else |
| 1030 | PUTU32(ctx->Yi.c+12,ctr); | 1080 | PUTU32(ctx->Yi.c + 12, ctr); |
| 1031 | #endif | 1081 | #endif |
| 1032 | #else /* BIG_ENDIAN */ | 1082 | #else /* BIG_ENDIAN */ |
| 1033 | ctx->Yi.d[3] = ctr; | 1083 | ctx->Yi.d[3] = ctr; |
| 1034 | #endif | 1084 | #endif |
| 1035 | while (len--) { | 1085 | while (len--) { |
| 1036 | ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n]; | 1086 | ctx->Xi.c[n] ^= out[n] = in[n] ^ |
| 1037 | ++n; | 1087 | ctx->EKi.c[n]; |
| 1088 | ++n; | ||
| 1089 | } | ||
| 1038 | } | 1090 | } |
| 1039 | } | ||
| 1040 | 1091 | ||
| 1041 | ctx->mres = n; | 1092 | ctx->mres = n; |
| 1042 | return 0; | 1093 | return 0; |
| 1043 | } while(0); | 1094 | } while (0); |
| 1044 | #endif | 1095 | #endif |
| 1045 | for (i=0;i<len;++i) { | 1096 | for (i = 0; i < len; ++i) { |
| 1046 | if (n==0) { | 1097 | if (n == 0) { |
| 1047 | (*block)(ctx->Yi.c,ctx->EKi.c,key); | 1098 | (*block)(ctx->Yi.c, ctx->EKi.c, key); |
| 1048 | ++ctr; | 1099 | ++ctr; |
| 1049 | #if BYTE_ORDER == LITTLE_ENDIAN | 1100 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 1050 | #ifdef BSWAP4 | 1101 | #ifdef BSWAP4 |
| 1051 | ctx->Yi.d[3] = BSWAP4(ctr); | 1102 | ctx->Yi.d[3] = BSWAP4(ctr); |
| 1052 | #else | 1103 | #else |
| 1053 | PUTU32(ctx->Yi.c+12,ctr); | 1104 | PUTU32(ctx->Yi.c + 12, ctr); |
| 1054 | #endif | 1105 | #endif |
| 1055 | #else /* BIG_ENDIAN */ | 1106 | #else /* BIG_ENDIAN */ |
| 1056 | ctx->Yi.d[3] = ctr; | 1107 | ctx->Yi.d[3] = ctr; |
| 1057 | #endif | 1108 | #endif |
| 1058 | } | 1109 | } |
| 1059 | ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n]; | 1110 | ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n]; |
| 1060 | n = (n+1)%16; | 1111 | n = (n + 1) % 16; |
| 1061 | if (n==0) | 1112 | if (n == 0) |
| 1062 | GCM_MUL(ctx,Xi); | 1113 | GCM_MUL(ctx, Xi); |
| 1063 | } | 1114 | } |
| 1064 | 1115 | ||
| 1065 | ctx->mres = n; | 1116 | ctx->mres = n; |
| 1066 | return 0; | 1117 | return 0; |
| 1067 | } | 1118 | } |
| 1068 | 1119 | ||
| 1069 | int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, | 1120 | int |
| 1070 | const unsigned char *in, unsigned char *out, | 1121 | CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, |
| 1071 | size_t len) | 1122 | const unsigned char *in, unsigned char *out, |
| 1123 | size_t len) | ||
| 1072 | { | 1124 | { |
| 1073 | unsigned int n, ctr; | 1125 | unsigned int n, ctr; |
| 1074 | size_t i; | 1126 | size_t i; |
| 1075 | u64 mlen = ctx->len.u[1]; | 1127 | u64 mlen = ctx->len.u[1]; |
| 1076 | block128_f block = ctx->block; | 1128 | block128_f block = ctx->block; |
| 1077 | void *key = ctx->key; | 1129 | void *key = ctx->key; |
| 1078 | #ifdef GCM_FUNCREF_4BIT | 1130 | #ifdef GCM_FUNCREF_4BIT |
| 1079 | void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; | 1131 | void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult; |
| 1080 | # ifdef GHASH | 1132 | # ifdef GHASH |
| 1081 | void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], | 1133 | void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16], |
| 1082 | const u8 *inp,size_t len) = ctx->ghash; | 1134 | const u8 *inp, size_t len) = ctx->ghash; |
| 1083 | # endif | 1135 | # endif |
| 1084 | #endif | 1136 | #endif |
| 1085 | 1137 | ||
| 1086 | mlen += len; | 1138 | mlen += len; |
| 1087 | if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len)) | 1139 | if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) |
| 1088 | return -1; | 1140 | return -1; |
| 1089 | ctx->len.u[1] = mlen; | 1141 | ctx->len.u[1] = mlen; |
| 1090 | 1142 | ||
| 1091 | if (ctx->ares) { | 1143 | if (ctx->ares) { |
| 1092 | /* First call to decrypt finalizes GHASH(AAD) */ | 1144 | /* First call to decrypt finalizes GHASH(AAD) */ |
| 1093 | GCM_MUL(ctx,Xi); | 1145 | GCM_MUL(ctx, Xi); |
| 1094 | ctx->ares = 0; | 1146 | ctx->ares = 0; |
| 1095 | } | 1147 | } |
| 1096 | 1148 | ||
| @@ -1098,7 +1150,7 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, | |||
| 1098 | #ifdef BSWAP4 | 1150 | #ifdef BSWAP4 |
| 1099 | ctr = BSWAP4(ctx->Yi.d[3]); | 1151 | ctr = BSWAP4(ctx->Yi.d[3]); |
| 1100 | #else | 1152 | #else |
| 1101 | ctr = GETU32(ctx->Yi.c+12); | 1153 | ctr = GETU32(ctx->Yi.c + 12); |
| 1102 | #endif | 1154 | #endif |
| 1103 | #else /* BIG_ENDIAN */ | 1155 | #else /* BIG_ENDIAN */ |
| 1104 | ctr = ctx->Yi.d[3]; | 1156 | ctr = ctx->Yi.d[3]; |
| @@ -1106,179 +1158,184 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, | |||
| 1106 | 1158 | ||
| 1107 | n = ctx->mres; | 1159 | n = ctx->mres; |
| 1108 | #if !defined(OPENSSL_SMALL_FOOTPRINT) | 1160 | #if !defined(OPENSSL_SMALL_FOOTPRINT) |
| 1109 | if (16%sizeof(size_t) == 0) do { /* always true actually */ | 1161 | if (16 % sizeof(size_t) == 0) |
| 1110 | if (n) { | 1162 | do { /* always true actually */ |
| 1111 | while (n && len) { | 1163 | if (n) { |
| 1112 | u8 c = *(in++); | 1164 | while (n && len) { |
| 1113 | *(out++) = c^ctx->EKi.c[n]; | 1165 | u8 c = *(in++); |
| 1114 | ctx->Xi.c[n] ^= c; | 1166 | *(out++) = c ^ ctx->EKi.c[n]; |
| 1115 | --len; | 1167 | ctx->Xi.c[n] ^= c; |
| 1116 | n = (n+1)%16; | 1168 | --len; |
| 1117 | } | 1169 | n = (n + 1) % 16; |
| 1118 | if (n==0) GCM_MUL (ctx,Xi); | 1170 | } |
| 1119 | else { | 1171 | if (n == 0) |
| 1120 | ctx->mres = n; | 1172 | GCM_MUL(ctx, Xi); |
| 1121 | return 0; | 1173 | else { |
| 1174 | ctx->mres = n; | ||
| 1175 | return 0; | ||
| 1176 | } | ||
| 1122 | } | 1177 | } |
| 1123 | } | ||
| 1124 | #ifdef __STRICT_ALIGNMENT | 1178 | #ifdef __STRICT_ALIGNMENT |
| 1125 | if (((size_t)in|(size_t)out)%sizeof(size_t) != 0) | 1179 | if (((size_t)in|(size_t)out) % sizeof(size_t) != 0) |
| 1126 | break; | 1180 | break; |
| 1127 | #endif | 1181 | #endif |
| 1128 | #if defined(GHASH) && defined(GHASH_CHUNK) | 1182 | #if defined(GHASH) && defined(GHASH_CHUNK) |
| 1129 | while (len>=GHASH_CHUNK) { | 1183 | while (len >= GHASH_CHUNK) { |
| 1130 | size_t j=GHASH_CHUNK; | 1184 | size_t j = GHASH_CHUNK; |
| 1131 | 1185 | ||
| 1132 | GHASH(ctx,in,GHASH_CHUNK); | 1186 | GHASH(ctx, in, GHASH_CHUNK); |
| 1133 | while (j) { | 1187 | while (j) { |
| 1134 | size_t *out_t=(size_t *)out; | 1188 | size_t *out_t = (size_t *)out; |
| 1135 | const size_t *in_t=(const size_t *)in; | 1189 | const size_t *in_t = (const size_t *)in; |
| 1136 | 1190 | ||
| 1137 | (*block)(ctx->Yi.c,ctx->EKi.c,key); | 1191 | (*block)(ctx->Yi.c, ctx->EKi.c, key); |
| 1138 | ++ctr; | 1192 | ++ctr; |
| 1139 | #if BYTE_ORDER == LITTLE_ENDIAN | 1193 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 1140 | #ifdef BSWAP4 | 1194 | #ifdef BSWAP4 |
| 1141 | ctx->Yi.d[3] = BSWAP4(ctr); | 1195 | ctx->Yi.d[3] = BSWAP4(ctr); |
| 1142 | #else | 1196 | #else |
| 1143 | PUTU32(ctx->Yi.c+12,ctr); | 1197 | PUTU32(ctx->Yi.c + 12, ctr); |
| 1144 | #endif | 1198 | #endif |
| 1145 | #else /* BIG_ENDIAN */ | 1199 | #else /* BIG_ENDIAN */ |
| 1146 | ctx->Yi.d[3] = ctr; | 1200 | ctx->Yi.d[3] = ctr; |
| 1147 | #endif | 1201 | #endif |
| 1148 | for (i=0; i<16/sizeof(size_t); ++i) | 1202 | for (i = 0; i < 16/sizeof(size_t); ++i) |
| 1149 | out_t[i] = in_t[i]^ctx->EKi.t[i]; | 1203 | out_t[i] = in_t[i] ^ |
| 1150 | out += 16; | 1204 | ctx->EKi.t[i]; |
| 1151 | in += 16; | 1205 | out += 16; |
| 1152 | j -= 16; | 1206 | in += 16; |
| 1153 | } | 1207 | j -= 16; |
| 1154 | len -= GHASH_CHUNK; | 1208 | } |
| 1155 | } | 1209 | len -= GHASH_CHUNK; |
| 1156 | if ((i = (len&(size_t)-16))) { | 1210 | } |
| 1157 | GHASH(ctx,in,i); | 1211 | if ((i = (len & (size_t)-16))) { |
| 1158 | while (len>=16) { | 1212 | GHASH(ctx, in, i); |
| 1159 | size_t *out_t=(size_t *)out; | 1213 | while (len >= 16) { |
| 1160 | const size_t *in_t=(const size_t *)in; | 1214 | size_t *out_t = (size_t *)out; |
| 1161 | 1215 | const size_t *in_t = (const size_t *)in; | |
| 1162 | (*block)(ctx->Yi.c,ctx->EKi.c,key); | 1216 | |
| 1163 | ++ctr; | 1217 | (*block)(ctx->Yi.c, ctx->EKi.c, key); |
| 1218 | ++ctr; | ||
| 1164 | #if BYTE_ORDER == LITTLE_ENDIAN | 1219 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 1165 | #ifdef BSWAP4 | 1220 | #ifdef BSWAP4 |
| 1166 | ctx->Yi.d[3] = BSWAP4(ctr); | 1221 | ctx->Yi.d[3] = BSWAP4(ctr); |
| 1167 | #else | 1222 | #else |
| 1168 | PUTU32(ctx->Yi.c+12,ctr); | 1223 | PUTU32(ctx->Yi.c + 12, ctr); |
| 1169 | #endif | 1224 | #endif |
| 1170 | #else /* BIG_ENDIAN */ | 1225 | #else /* BIG_ENDIAN */ |
| 1171 | ctx->Yi.d[3] = ctr; | 1226 | ctx->Yi.d[3] = ctr; |
| 1172 | #endif | 1227 | #endif |
| 1173 | for (i=0; i<16/sizeof(size_t); ++i) | 1228 | for (i = 0; i < 16/sizeof(size_t); ++i) |
| 1174 | out_t[i] = in_t[i]^ctx->EKi.t[i]; | 1229 | out_t[i] = in_t[i] ^ |
| 1175 | out += 16; | 1230 | ctx->EKi.t[i]; |
| 1176 | in += 16; | 1231 | out += 16; |
| 1177 | len -= 16; | 1232 | in += 16; |
| 1178 | } | 1233 | len -= 16; |
| 1179 | } | 1234 | } |
| 1235 | } | ||
| 1180 | #else | 1236 | #else |
| 1181 | while (len>=16) { | 1237 | while (len >= 16) { |
| 1182 | size_t *out_t=(size_t *)out; | 1238 | size_t *out_t = (size_t *)out; |
| 1183 | const size_t *in_t=(const size_t *)in; | 1239 | const size_t *in_t = (const size_t *)in; |
| 1184 | 1240 | ||
| 1185 | (*block)(ctx->Yi.c,ctx->EKi.c,key); | 1241 | (*block)(ctx->Yi.c, ctx->EKi.c, key); |
| 1186 | ++ctr; | 1242 | ++ctr; |
| 1187 | #if BYTE_ORDER == LITTLE_ENDIAN | 1243 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 1188 | #ifdef BSWAP4 | 1244 | #ifdef BSWAP4 |
| 1189 | ctx->Yi.d[3] = BSWAP4(ctr); | 1245 | ctx->Yi.d[3] = BSWAP4(ctr); |
| 1190 | #else | 1246 | #else |
| 1191 | PUTU32(ctx->Yi.c+12,ctr); | 1247 | PUTU32(ctx->Yi.c + 12, ctr); |
| 1192 | #endif | 1248 | #endif |
| 1193 | #else /* BIG_ENDIAN */ | 1249 | #else /* BIG_ENDIAN */ |
| 1194 | ctx->Yi.d[3] = ctr; | 1250 | ctx->Yi.d[3] = ctr; |
| 1195 | #endif | 1251 | #endif |
| 1196 | for (i=0; i<16/sizeof(size_t); ++i) { | 1252 | for (i = 0; i < 16/sizeof(size_t); ++i) { |
| 1197 | size_t c = in[i]; | 1253 | size_t c = in[i]; |
| 1198 | out[i] = c^ctx->EKi.t[i]; | 1254 | out[i] = c ^ ctx->EKi.t[i]; |
| 1199 | ctx->Xi.t[i] ^= c; | 1255 | ctx->Xi.t[i] ^= c; |
| 1256 | } | ||
| 1257 | GCM_MUL(ctx, Xi); | ||
| 1258 | out += 16; | ||
| 1259 | in += 16; | ||
| 1260 | len -= 16; | ||
| 1200 | } | 1261 | } |
| 1201 | GCM_MUL(ctx,Xi); | ||
| 1202 | out += 16; | ||
| 1203 | in += 16; | ||
| 1204 | len -= 16; | ||
| 1205 | } | ||
| 1206 | #endif | 1262 | #endif |
| 1207 | if (len) { | 1263 | if (len) { |
| 1208 | (*block)(ctx->Yi.c,ctx->EKi.c,key); | 1264 | (*block)(ctx->Yi.c, ctx->EKi.c, key); |
| 1209 | ++ctr; | 1265 | ++ctr; |
| 1210 | #if BYTE_ORDER == LITTLE_ENDIAN | 1266 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 1211 | #ifdef BSWAP4 | 1267 | #ifdef BSWAP4 |
| 1212 | ctx->Yi.d[3] = BSWAP4(ctr); | 1268 | ctx->Yi.d[3] = BSWAP4(ctr); |
| 1213 | #else | 1269 | #else |
| 1214 | PUTU32(ctx->Yi.c+12,ctr); | 1270 | PUTU32(ctx->Yi.c + 12, ctr); |
| 1215 | #endif | 1271 | #endif |
| 1216 | #else /* BIG_ENDIAN */ | 1272 | #else /* BIG_ENDIAN */ |
| 1217 | ctx->Yi.d[3] = ctr; | 1273 | ctx->Yi.d[3] = ctr; |
| 1218 | #endif | 1274 | #endif |
| 1219 | while (len--) { | 1275 | while (len--) { |
| 1220 | u8 c = in[n]; | 1276 | u8 c = in[n]; |
| 1221 | ctx->Xi.c[n] ^= c; | 1277 | ctx->Xi.c[n] ^= c; |
| 1222 | out[n] = c^ctx->EKi.c[n]; | 1278 | out[n] = c ^ ctx->EKi.c[n]; |
| 1223 | ++n; | 1279 | ++n; |
| 1280 | } | ||
| 1224 | } | 1281 | } |
| 1225 | } | ||
| 1226 | 1282 | ||
| 1227 | ctx->mres = n; | 1283 | ctx->mres = n; |
| 1228 | return 0; | 1284 | return 0; |
| 1229 | } while(0); | 1285 | } while (0); |
| 1230 | #endif | 1286 | #endif |
| 1231 | for (i=0;i<len;++i) { | 1287 | for (i = 0; i < len; ++i) { |
| 1232 | u8 c; | 1288 | u8 c; |
| 1233 | if (n==0) { | 1289 | if (n == 0) { |
| 1234 | (*block)(ctx->Yi.c,ctx->EKi.c,key); | 1290 | (*block)(ctx->Yi.c, ctx->EKi.c, key); |
| 1235 | ++ctr; | 1291 | ++ctr; |
| 1236 | #if BYTE_ORDER == LITTLE_ENDIAN | 1292 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 1237 | #ifdef BSWAP4 | 1293 | #ifdef BSWAP4 |
| 1238 | ctx->Yi.d[3] = BSWAP4(ctr); | 1294 | ctx->Yi.d[3] = BSWAP4(ctr); |
| 1239 | #else | 1295 | #else |
| 1240 | PUTU32(ctx->Yi.c+12,ctr); | 1296 | PUTU32(ctx->Yi.c + 12, ctr); |
| 1241 | #endif | 1297 | #endif |
| 1242 | #else /* BIG_ENDIAN */ | 1298 | #else /* BIG_ENDIAN */ |
| 1243 | ctx->Yi.d[3] = ctr; | 1299 | ctx->Yi.d[3] = ctr; |
| 1244 | #endif | 1300 | #endif |
| 1245 | } | 1301 | } |
| 1246 | c = in[i]; | 1302 | c = in[i]; |
| 1247 | out[i] = c^ctx->EKi.c[n]; | 1303 | out[i] = c ^ ctx->EKi.c[n]; |
| 1248 | ctx->Xi.c[n] ^= c; | 1304 | ctx->Xi.c[n] ^= c; |
| 1249 | n = (n+1)%16; | 1305 | n = (n + 1) % 16; |
| 1250 | if (n==0) | 1306 | if (n == 0) |
| 1251 | GCM_MUL(ctx,Xi); | 1307 | GCM_MUL(ctx, Xi); |
| 1252 | } | 1308 | } |
| 1253 | 1309 | ||
| 1254 | ctx->mres = n; | 1310 | ctx->mres = n; |
| 1255 | return 0; | 1311 | return 0; |
| 1256 | } | 1312 | } |
| 1257 | 1313 | ||
| 1258 | int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, | 1314 | int |
| 1259 | const unsigned char *in, unsigned char *out, | 1315 | CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, |
| 1260 | size_t len, ctr128_f stream) | 1316 | const unsigned char *in, unsigned char *out, |
| 1317 | size_t len, ctr128_f stream) | ||
| 1261 | { | 1318 | { |
| 1262 | unsigned int n, ctr; | 1319 | unsigned int n, ctr; |
| 1263 | size_t i; | 1320 | size_t i; |
| 1264 | u64 mlen = ctx->len.u[1]; | 1321 | u64 mlen = ctx->len.u[1]; |
| 1265 | void *key = ctx->key; | 1322 | void *key = ctx->key; |
| 1266 | #ifdef GCM_FUNCREF_4BIT | 1323 | #ifdef GCM_FUNCREF_4BIT |
| 1267 | void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; | 1324 | void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult; |
| 1268 | # ifdef GHASH | 1325 | # ifdef GHASH |
| 1269 | void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], | 1326 | void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16], |
| 1270 | const u8 *inp,size_t len) = ctx->ghash; | 1327 | const u8 *inp, size_t len) = ctx->ghash; |
| 1271 | # endif | 1328 | # endif |
| 1272 | #endif | 1329 | #endif |
| 1273 | 1330 | ||
| 1274 | mlen += len; | 1331 | mlen += len; |
| 1275 | if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len)) | 1332 | if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) |
| 1276 | return -1; | 1333 | return -1; |
| 1277 | ctx->len.u[1] = mlen; | 1334 | ctx->len.u[1] = mlen; |
| 1278 | 1335 | ||
| 1279 | if (ctx->ares) { | 1336 | if (ctx->ares) { |
| 1280 | /* First call to encrypt finalizes GHASH(AAD) */ | 1337 | /* First call to encrypt finalizes GHASH(AAD) */ |
| 1281 | GCM_MUL(ctx,Xi); | 1338 | GCM_MUL(ctx, Xi); |
| 1282 | ctx->ares = 0; | 1339 | ctx->ares = 0; |
| 1283 | } | 1340 | } |
| 1284 | 1341 | ||
| @@ -1286,7 +1343,7 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, | |||
| 1286 | #ifdef BSWAP4 | 1343 | #ifdef BSWAP4 |
| 1287 | ctr = BSWAP4(ctx->Yi.d[3]); | 1344 | ctr = BSWAP4(ctx->Yi.d[3]); |
| 1288 | #else | 1345 | #else |
| 1289 | ctr = GETU32(ctx->Yi.c+12); | 1346 | ctr = GETU32(ctx->Yi.c + 12); |
| 1290 | #endif | 1347 | #endif |
| 1291 | #else /* BIG_ENDIAN */ | 1348 | #else /* BIG_ENDIAN */ |
| 1292 | ctr = ctx->Yi.d[3]; | 1349 | ctr = ctx->Yi.d[3]; |
| @@ -1295,76 +1352,78 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, | |||
| 1295 | n = ctx->mres; | 1352 | n = ctx->mres; |
| 1296 | if (n) { | 1353 | if (n) { |
| 1297 | while (n && len) { | 1354 | while (n && len) { |
| 1298 | ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n]; | 1355 | ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; |
| 1299 | --len; | 1356 | --len; |
| 1300 | n = (n+1)%16; | 1357 | n = (n + 1) % 16; |
| 1301 | } | 1358 | } |
| 1302 | if (n==0) GCM_MUL(ctx,Xi); | 1359 | if (n == 0) |
| 1360 | GCM_MUL(ctx, Xi); | ||
| 1303 | else { | 1361 | else { |
| 1304 | ctx->mres = n; | 1362 | ctx->mres = n; |
| 1305 | return 0; | 1363 | return 0; |
| 1306 | } | 1364 | } |
| 1307 | } | 1365 | } |
| 1308 | #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) | 1366 | #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) |
| 1309 | while (len>=GHASH_CHUNK) { | 1367 | while (len >= GHASH_CHUNK) { |
| 1310 | (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c); | 1368 | (*stream)(in, out, GHASH_CHUNK/16, key, ctx->Yi.c); |
| 1311 | ctr += GHASH_CHUNK/16; | 1369 | ctr += GHASH_CHUNK/16; |
| 1312 | #if BYTE_ORDER == LITTLE_ENDIAN | 1370 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 1313 | #ifdef BSWAP4 | 1371 | #ifdef BSWAP4 |
| 1314 | ctx->Yi.d[3] = BSWAP4(ctr); | 1372 | ctx->Yi.d[3] = BSWAP4(ctr); |
| 1315 | #else | 1373 | #else |
| 1316 | PUTU32(ctx->Yi.c+12,ctr); | 1374 | PUTU32(ctx->Yi.c + 12, ctr); |
| 1317 | #endif | 1375 | #endif |
| 1318 | #else /* BIG_ENDIAN */ | 1376 | #else /* BIG_ENDIAN */ |
| 1319 | ctx->Yi.d[3] = ctr; | 1377 | ctx->Yi.d[3] = ctr; |
| 1320 | #endif | 1378 | #endif |
| 1321 | GHASH(ctx,out,GHASH_CHUNK); | 1379 | GHASH(ctx, out, GHASH_CHUNK); |
| 1322 | out += GHASH_CHUNK; | 1380 | out += GHASH_CHUNK; |
| 1323 | in += GHASH_CHUNK; | 1381 | in += GHASH_CHUNK; |
| 1324 | len -= GHASH_CHUNK; | 1382 | len -= GHASH_CHUNK; |
| 1325 | } | 1383 | } |
| 1326 | #endif | 1384 | #endif |
| 1327 | if ((i = (len&(size_t)-16))) { | 1385 | if ((i = (len & (size_t)-16))) { |
| 1328 | size_t j=i/16; | 1386 | size_t j = i/16; |
| 1329 | 1387 | ||
| 1330 | (*stream)(in,out,j,key,ctx->Yi.c); | 1388 | (*stream)(in, out, j, key, ctx->Yi.c); |
| 1331 | ctr += (unsigned int)j; | 1389 | ctr += (unsigned int)j; |
| 1332 | #if BYTE_ORDER == LITTLE_ENDIAN | 1390 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 1333 | #ifdef BSWAP4 | 1391 | #ifdef BSWAP4 |
| 1334 | ctx->Yi.d[3] = BSWAP4(ctr); | 1392 | ctx->Yi.d[3] = BSWAP4(ctr); |
| 1335 | #else | 1393 | #else |
| 1336 | PUTU32(ctx->Yi.c+12,ctr); | 1394 | PUTU32(ctx->Yi.c + 12, ctr); |
| 1337 | #endif | 1395 | #endif |
| 1338 | #else /* BIG_ENDIAN */ | 1396 | #else /* BIG_ENDIAN */ |
| 1339 | ctx->Yi.d[3] = ctr; | 1397 | ctx->Yi.d[3] = ctr; |
| 1340 | #endif | 1398 | #endif |
| 1341 | in += i; | 1399 | in += i; |
| 1342 | len -= i; | 1400 | len -= i; |
| 1343 | #if defined(GHASH) | 1401 | #if defined(GHASH) |
| 1344 | GHASH(ctx,out,i); | 1402 | GHASH(ctx, out, i); |
| 1345 | out += i; | 1403 | out += i; |
| 1346 | #else | 1404 | #else |
| 1347 | while (j--) { | 1405 | while (j--) { |
| 1348 | for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i]; | 1406 | for (i = 0; i < 16; ++i) |
| 1349 | GCM_MUL(ctx,Xi); | 1407 | ctx->Xi.c[i] ^= out[i]; |
| 1408 | GCM_MUL(ctx, Xi); | ||
| 1350 | out += 16; | 1409 | out += 16; |
| 1351 | } | 1410 | } |
| 1352 | #endif | 1411 | #endif |
| 1353 | } | 1412 | } |
| 1354 | if (len) { | 1413 | if (len) { |
| 1355 | (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key); | 1414 | (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key); |
| 1356 | ++ctr; | 1415 | ++ctr; |
| 1357 | #if BYTE_ORDER == LITTLE_ENDIAN | 1416 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 1358 | #ifdef BSWAP4 | 1417 | #ifdef BSWAP4 |
| 1359 | ctx->Yi.d[3] = BSWAP4(ctr); | 1418 | ctx->Yi.d[3] = BSWAP4(ctr); |
| 1360 | #else | 1419 | #else |
| 1361 | PUTU32(ctx->Yi.c+12,ctr); | 1420 | PUTU32(ctx->Yi.c + 12, ctr); |
| 1362 | #endif | 1421 | #endif |
| 1363 | #else /* BIG_ENDIAN */ | 1422 | #else /* BIG_ENDIAN */ |
| 1364 | ctx->Yi.d[3] = ctr; | 1423 | ctx->Yi.d[3] = ctr; |
| 1365 | #endif | 1424 | #endif |
| 1366 | while (len--) { | 1425 | while (len--) { |
| 1367 | ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n]; | 1426 | ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; |
| 1368 | ++n; | 1427 | ++n; |
| 1369 | } | 1428 | } |
| 1370 | } | 1429 | } |
| @@ -1373,30 +1432,31 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, | |||
| 1373 | return 0; | 1432 | return 0; |
| 1374 | } | 1433 | } |
| 1375 | 1434 | ||
| 1376 | int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, | 1435 | int |
| 1377 | const unsigned char *in, unsigned char *out, | 1436 | CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, |
| 1378 | size_t len,ctr128_f stream) | 1437 | const unsigned char *in, unsigned char *out, |
| 1438 | size_t len, ctr128_f stream) | ||
| 1379 | { | 1439 | { |
| 1380 | unsigned int n, ctr; | 1440 | unsigned int n, ctr; |
| 1381 | size_t i; | 1441 | size_t i; |
| 1382 | u64 mlen = ctx->len.u[1]; | 1442 | u64 mlen = ctx->len.u[1]; |
| 1383 | void *key = ctx->key; | 1443 | void *key = ctx->key; |
| 1384 | #ifdef GCM_FUNCREF_4BIT | 1444 | #ifdef GCM_FUNCREF_4BIT |
| 1385 | void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; | 1445 | void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult; |
| 1386 | # ifdef GHASH | 1446 | # ifdef GHASH |
| 1387 | void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], | 1447 | void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16], |
| 1388 | const u8 *inp,size_t len) = ctx->ghash; | 1448 | const u8 *inp, size_t len) = ctx->ghash; |
| 1389 | # endif | 1449 | # endif |
| 1390 | #endif | 1450 | #endif |
| 1391 | 1451 | ||
| 1392 | mlen += len; | 1452 | mlen += len; |
| 1393 | if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len)) | 1453 | if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) |
| 1394 | return -1; | 1454 | return -1; |
| 1395 | ctx->len.u[1] = mlen; | 1455 | ctx->len.u[1] = mlen; |
| 1396 | 1456 | ||
| 1397 | if (ctx->ares) { | 1457 | if (ctx->ares) { |
| 1398 | /* First call to decrypt finalizes GHASH(AAD) */ | 1458 | /* First call to decrypt finalizes GHASH(AAD) */ |
| 1399 | GCM_MUL(ctx,Xi); | 1459 | GCM_MUL(ctx, Xi); |
| 1400 | ctx->ares = 0; | 1460 | ctx->ares = 0; |
| 1401 | } | 1461 | } |
| 1402 | 1462 | ||
| @@ -1404,7 +1464,7 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, | |||
| 1404 | #ifdef BSWAP4 | 1464 | #ifdef BSWAP4 |
| 1405 | ctr = BSWAP4(ctx->Yi.d[3]); | 1465 | ctr = BSWAP4(ctx->Yi.d[3]); |
| 1406 | #else | 1466 | #else |
| 1407 | ctr = GETU32(ctx->Yi.c+12); | 1467 | ctr = GETU32(ctx->Yi.c + 12); |
| 1408 | #endif | 1468 | #endif |
| 1409 | #else /* BIG_ENDIAN */ | 1469 | #else /* BIG_ENDIAN */ |
| 1410 | ctr = ctx->Yi.d[3]; | 1470 | ctr = ctx->Yi.d[3]; |
| @@ -1414,74 +1474,76 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, | |||
| 1414 | if (n) { | 1474 | if (n) { |
| 1415 | while (n && len) { | 1475 | while (n && len) { |
| 1416 | u8 c = *(in++); | 1476 | u8 c = *(in++); |
| 1417 | *(out++) = c^ctx->EKi.c[n]; | 1477 | *(out++) = c ^ ctx->EKi.c[n]; |
| 1418 | ctx->Xi.c[n] ^= c; | 1478 | ctx->Xi.c[n] ^= c; |
| 1419 | --len; | 1479 | --len; |
| 1420 | n = (n+1)%16; | 1480 | n = (n + 1) % 16; |
| 1421 | } | 1481 | } |
| 1422 | if (n==0) GCM_MUL (ctx,Xi); | 1482 | if (n == 0) |
| 1483 | GCM_MUL(ctx, Xi); | ||
| 1423 | else { | 1484 | else { |
| 1424 | ctx->mres = n; | 1485 | ctx->mres = n; |
| 1425 | return 0; | 1486 | return 0; |
| 1426 | } | 1487 | } |
| 1427 | } | 1488 | } |
| 1428 | #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) | 1489 | #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) |
| 1429 | while (len>=GHASH_CHUNK) { | 1490 | while (len >= GHASH_CHUNK) { |
| 1430 | GHASH(ctx,in,GHASH_CHUNK); | 1491 | GHASH(ctx, in, GHASH_CHUNK); |
| 1431 | (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c); | 1492 | (*stream)(in, out, GHASH_CHUNK/16, key, ctx->Yi.c); |
| 1432 | ctr += GHASH_CHUNK/16; | 1493 | ctr += GHASH_CHUNK/16; |
| 1433 | #if BYTE_ORDER == LITTLE_ENDIAN | 1494 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 1434 | #ifdef BSWAP4 | 1495 | #ifdef BSWAP4 |
| 1435 | ctx->Yi.d[3] = BSWAP4(ctr); | 1496 | ctx->Yi.d[3] = BSWAP4(ctr); |
| 1436 | #else | 1497 | #else |
| 1437 | PUTU32(ctx->Yi.c+12,ctr); | 1498 | PUTU32(ctx->Yi.c + 12, ctr); |
| 1438 | #endif | 1499 | #endif |
| 1439 | #else /* BIG_ENDIAN */ | 1500 | #else /* BIG_ENDIAN */ |
| 1440 | ctx->Yi.d[3] = ctr; | 1501 | ctx->Yi.d[3] = ctr; |
| 1441 | #endif | 1502 | #endif |
| 1442 | out += GHASH_CHUNK; | 1503 | out += GHASH_CHUNK; |
| 1443 | in += GHASH_CHUNK; | 1504 | in += GHASH_CHUNK; |
| 1444 | len -= GHASH_CHUNK; | 1505 | len -= GHASH_CHUNK; |
| 1445 | } | 1506 | } |
| 1446 | #endif | 1507 | #endif |
| 1447 | if ((i = (len&(size_t)-16))) { | 1508 | if ((i = (len & (size_t)-16))) { |
| 1448 | size_t j=i/16; | 1509 | size_t j = i/16; |
| 1449 | 1510 | ||
| 1450 | #if defined(GHASH) | 1511 | #if defined(GHASH) |
| 1451 | GHASH(ctx,in,i); | 1512 | GHASH(ctx, in, i); |
| 1452 | #else | 1513 | #else |
| 1453 | while (j--) { | 1514 | while (j--) { |
| 1454 | size_t k; | 1515 | size_t k; |
| 1455 | for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k]; | 1516 | for (k = 0; k < 16; ++k) |
| 1456 | GCM_MUL(ctx,Xi); | 1517 | ctx->Xi.c[k] ^= in[k]; |
| 1518 | GCM_MUL(ctx, Xi); | ||
| 1457 | in += 16; | 1519 | in += 16; |
| 1458 | } | 1520 | } |
| 1459 | j = i/16; | 1521 | j = i/16; |
| 1460 | in -= i; | 1522 | in -= i; |
| 1461 | #endif | 1523 | #endif |
| 1462 | (*stream)(in,out,j,key,ctx->Yi.c); | 1524 | (*stream)(in, out, j, key, ctx->Yi.c); |
| 1463 | ctr += (unsigned int)j; | 1525 | ctr += (unsigned int)j; |
| 1464 | #if BYTE_ORDER == LITTLE_ENDIAN | 1526 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 1465 | #ifdef BSWAP4 | 1527 | #ifdef BSWAP4 |
| 1466 | ctx->Yi.d[3] = BSWAP4(ctr); | 1528 | ctx->Yi.d[3] = BSWAP4(ctr); |
| 1467 | #else | 1529 | #else |
| 1468 | PUTU32(ctx->Yi.c+12,ctr); | 1530 | PUTU32(ctx->Yi.c + 12, ctr); |
| 1469 | #endif | 1531 | #endif |
| 1470 | #else /* BIG_ENDIAN */ | 1532 | #else /* BIG_ENDIAN */ |
| 1471 | ctx->Yi.d[3] = ctr; | 1533 | ctx->Yi.d[3] = ctr; |
| 1472 | #endif | 1534 | #endif |
| 1473 | out += i; | 1535 | out += i; |
| 1474 | in += i; | 1536 | in += i; |
| 1475 | len -= i; | 1537 | len -= i; |
| 1476 | } | 1538 | } |
| 1477 | if (len) { | 1539 | if (len) { |
| 1478 | (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key); | 1540 | (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key); |
| 1479 | ++ctr; | 1541 | ++ctr; |
| 1480 | #if BYTE_ORDER == LITTLE_ENDIAN | 1542 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 1481 | #ifdef BSWAP4 | 1543 | #ifdef BSWAP4 |
| 1482 | ctx->Yi.d[3] = BSWAP4(ctr); | 1544 | ctx->Yi.d[3] = BSWAP4(ctr); |
| 1483 | #else | 1545 | #else |
| 1484 | PUTU32(ctx->Yi.c+12,ctr); | 1546 | PUTU32(ctx->Yi.c + 12, ctr); |
| 1485 | #endif | 1547 | #endif |
| 1486 | #else /* BIG_ENDIAN */ | 1548 | #else /* BIG_ENDIAN */ |
| 1487 | ctx->Yi.d[3] = ctr; | 1549 | ctx->Yi.d[3] = ctr; |
| @@ -1489,7 +1551,7 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, | |||
| 1489 | while (len--) { | 1551 | while (len--) { |
| 1490 | u8 c = in[n]; | 1552 | u8 c = in[n]; |
| 1491 | ctx->Xi.c[n] ^= c; | 1553 | ctx->Xi.c[n] ^= c; |
| 1492 | out[n] = c^ctx->EKi.c[n]; | 1554 | out[n] = c ^ ctx->EKi.c[n]; |
| 1493 | ++n; | 1555 | ++n; |
| 1494 | } | 1556 | } |
| 1495 | } | 1557 | } |
| @@ -1498,17 +1560,18 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, | |||
| 1498 | return 0; | 1560 | return 0; |
| 1499 | } | 1561 | } |
| 1500 | 1562 | ||
| 1501 | int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag, | 1563 | int |
| 1502 | size_t len) | 1564 | CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag, |
| 1565 | size_t len) | ||
| 1503 | { | 1566 | { |
| 1504 | u64 alen = ctx->len.u[0]<<3; | 1567 | u64 alen = ctx->len.u[0] << 3; |
| 1505 | u64 clen = ctx->len.u[1]<<3; | 1568 | u64 clen = ctx->len.u[1] << 3; |
| 1506 | #ifdef GCM_FUNCREF_4BIT | 1569 | #ifdef GCM_FUNCREF_4BIT |
| 1507 | void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; | 1570 | void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult; |
| 1508 | #endif | 1571 | #endif |
| 1509 | 1572 | ||
| 1510 | if (ctx->mres || ctx->ares) | 1573 | if (ctx->mres || ctx->ares) |
| 1511 | GCM_MUL(ctx,Xi); | 1574 | GCM_MUL(ctx, Xi); |
| 1512 | 1575 | ||
| 1513 | #if BYTE_ORDER == LITTLE_ENDIAN | 1576 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 1514 | #ifdef BSWAP8 | 1577 | #ifdef BSWAP8 |
| @@ -1521,42 +1584,46 @@ int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag, | |||
| 1521 | ctx->len.u[0] = alen; | 1584 | ctx->len.u[0] = alen; |
| 1522 | ctx->len.u[1] = clen; | 1585 | ctx->len.u[1] = clen; |
| 1523 | 1586 | ||
| 1524 | alen = (u64)GETU32(p) <<32|GETU32(p+4); | 1587 | alen = (u64)GETU32(p) << 32|GETU32(p + 4); |
| 1525 | clen = (u64)GETU32(p+8)<<32|GETU32(p+12); | 1588 | clen = (u64)GETU32(p + 8) << 32|GETU32(p + 12); |
| 1526 | } | 1589 | } |
| 1527 | #endif | 1590 | #endif |
| 1528 | #endif | 1591 | #endif |
| 1529 | 1592 | ||
| 1530 | ctx->Xi.u[0] ^= alen; | 1593 | ctx->Xi.u[0] ^= alen; |
| 1531 | ctx->Xi.u[1] ^= clen; | 1594 | ctx->Xi.u[1] ^= clen; |
| 1532 | GCM_MUL(ctx,Xi); | 1595 | GCM_MUL(ctx, Xi); |
| 1533 | 1596 | ||
| 1534 | ctx->Xi.u[0] ^= ctx->EK0.u[0]; | 1597 | ctx->Xi.u[0] ^= ctx->EK0.u[0]; |
| 1535 | ctx->Xi.u[1] ^= ctx->EK0.u[1]; | 1598 | ctx->Xi.u[1] ^= ctx->EK0.u[1]; |
| 1536 | 1599 | ||
| 1537 | if (tag && len<=sizeof(ctx->Xi)) | 1600 | if (tag && len <= sizeof(ctx->Xi)) |
| 1538 | return memcmp(ctx->Xi.c,tag,len); | 1601 | return memcmp(ctx->Xi.c, tag, len); |
| 1539 | else | 1602 | else |
| 1540 | return -1; | 1603 | return -1; |
| 1541 | } | 1604 | } |
| 1542 | 1605 | ||
| 1543 | void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) | 1606 | void |
| 1607 | CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) | ||
| 1544 | { | 1608 | { |
| 1545 | CRYPTO_gcm128_finish(ctx, NULL, 0); | 1609 | CRYPTO_gcm128_finish(ctx, NULL, 0); |
| 1546 | memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c)); | 1610 | memcpy(tag, ctx->Xi.c, |
| 1611 | len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c)); | ||
| 1547 | } | 1612 | } |
| 1548 | 1613 | ||
| 1549 | GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) | 1614 | GCM128_CONTEXT * |
| 1615 | CRYPTO_gcm128_new(void *key, block128_f block) | ||
| 1550 | { | 1616 | { |
| 1551 | GCM128_CONTEXT *ret; | 1617 | GCM128_CONTEXT *ret; |
| 1552 | 1618 | ||
| 1553 | if ((ret = malloc(sizeof(GCM128_CONTEXT)))) | 1619 | if ((ret = malloc(sizeof(GCM128_CONTEXT)))) |
| 1554 | CRYPTO_gcm128_init(ret,key,block); | 1620 | CRYPTO_gcm128_init(ret, key, block); |
| 1555 | 1621 | ||
| 1556 | return ret; | 1622 | return ret; |
| 1557 | } | 1623 | } |
| 1558 | 1624 | ||
| 1559 | void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) | 1625 | void |
| 1626 | CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) | ||
| 1560 | { | 1627 | { |
| 1561 | freezero(ctx, sizeof(*ctx)); | 1628 | freezero(ctx, sizeof(*ctx)); |
| 1562 | } | 1629 | } |
