summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/modes/gcm128.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/modes/gcm128.c')
-rw-r--r--src/lib/libcrypto/modes/gcm128.c1539
1 files changed, 0 insertions, 1539 deletions
diff --git a/src/lib/libcrypto/modes/gcm128.c b/src/lib/libcrypto/modes/gcm128.c
deleted file mode 100644
index dd6d91e880..0000000000
--- a/src/lib/libcrypto/modes/gcm128.c
+++ /dev/null
@@ -1,1539 +0,0 @@
1/* $OpenBSD: gcm128.c,v 1.13 2015/09/10 15:56:25 jsing Exp $ */
2/* ====================================================================
3 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 */
50
51#define OPENSSL_FIPSAPI
52
53#include <openssl/crypto.h>
54#include "modes_lcl.h"
55#include <string.h>
56
57#ifndef MODES_DEBUG
58# ifndef NDEBUG
59# define NDEBUG
60# endif
61#endif
62
63#if defined(BSWAP4) && defined(__STRICT_ALIGNMENT)
64/* redefine, because alignment is ensured */
65#undef GETU32
66#define GETU32(p) BSWAP4(*(const u32 *)(p))
67#undef PUTU32
68#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
69#endif
70
71#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72#define REDUCE1BIT(V) \
73 do { \
74 if (sizeof(size_t)==8) { \
75 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
76 V.lo = (V.hi<<63)|(V.lo>>1); \
77 V.hi = (V.hi>>1 )^T; \
78 } else { \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
82 } \
83 } while(0)
84
85/*
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
106 *
107 * - shorter setup time effectively improves overall timing for
108 * handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 * subsystem penalties (for example on Windows large enough free
111 * results in VM working set trimming, meaning that consequent
112 * malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 * performance of other code paths (not necessarily even from same
115 * thread in Hyper-Threading world);
116 *
117 * Value of 1 is not appropriate for performance reasons.
118 */
119#if TABLE_BITS==8
120
121static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122{
123 int i, j;
124 u128 V;
125
126 Htable[0].hi = 0;
127 Htable[0].lo = 0;
128 V.hi = H[0];
129 V.lo = H[1];
130
131 for (Htable[128]=V, i=64; i>0; i>>=1) {
132 REDUCE1BIT(V);
133 Htable[i] = V;
134 }
135
136 for (i=2; i<256; i<<=1) {
137 u128 *Hi = Htable+i, H0 = *Hi;
138 for (j=1; j<i; ++j) {
139 Hi[j].hi = H0.hi^Htable[j].hi;
140 Hi[j].lo = H0.lo^Htable[j].lo;
141 }
142 }
143}
144
145static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146{
147 u128 Z = { 0, 0};
148 const u8 *xi = (const u8 *)Xi+15;
149 size_t rem, n = *xi;
150 static const size_t rem_8bit[256] = {
151 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
152 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
153 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
154 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
155 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
156 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
157 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
158 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
159 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
160 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
161 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
162 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
163 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
164 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
165 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
166 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
167 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
168 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
169 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
170 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
171 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
172 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
173 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
174 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
175 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
176 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
177 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
178 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
179 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
180 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
181 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
182 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
183 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
184 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
185 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
186 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
187 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
188 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
189 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
190 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
191 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
192 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
193 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
194 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
195 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
196 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
197 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
198 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
199 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
200 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
201 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
202 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
203 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
204 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
205 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
206 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
207 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
208 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
209 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
210 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
211 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
212 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
213 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
214 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
215
216 while (1) {
217 Z.hi ^= Htable[n].hi;
218 Z.lo ^= Htable[n].lo;
219
220 if ((u8 *)Xi==xi) break;
221
222 n = *(--xi);
223
224 rem = (size_t)Z.lo&0xff;
225 Z.lo = (Z.hi<<56)|(Z.lo>>8);
226 Z.hi = (Z.hi>>8);
227 if (sizeof(size_t)==8)
228 Z.hi ^= rem_8bit[rem];
229 else
230 Z.hi ^= (u64)rem_8bit[rem]<<32;
231 }
232
233 if (BYTE_ORDER == LITTLE_ENDIAN) {
234#ifdef BSWAP8
235 Xi[0] = BSWAP8(Z.hi);
236 Xi[1] = BSWAP8(Z.lo);
237#else
238 u8 *p = (u8 *)Xi;
239 u32 v;
240 v = (u32)(Z.hi>>32); PUTU32(p,v);
241 v = (u32)(Z.hi); PUTU32(p+4,v);
242 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
243 v = (u32)(Z.lo); PUTU32(p+12,v);
244#endif
245 }
246 else {
247 Xi[0] = Z.hi;
248 Xi[1] = Z.lo;
249 }
250}
251#define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
252
253#elif TABLE_BITS==4
254
255static void gcm_init_4bit(u128 Htable[16], u64 H[2])
256{
257 u128 V;
258#if defined(OPENSSL_SMALL_FOOTPRINT)
259 int i;
260#endif
261
262 Htable[0].hi = 0;
263 Htable[0].lo = 0;
264 V.hi = H[0];
265 V.lo = H[1];
266
267#if defined(OPENSSL_SMALL_FOOTPRINT)
268 for (Htable[8]=V, i=4; i>0; i>>=1) {
269 REDUCE1BIT(V);
270 Htable[i] = V;
271 }
272
273 for (i=2; i<16; i<<=1) {
274 u128 *Hi = Htable+i;
275 int j;
276 for (V=*Hi, j=1; j<i; ++j) {
277 Hi[j].hi = V.hi^Htable[j].hi;
278 Hi[j].lo = V.lo^Htable[j].lo;
279 }
280 }
281#else
282 Htable[8] = V;
283 REDUCE1BIT(V);
284 Htable[4] = V;
285 REDUCE1BIT(V);
286 Htable[2] = V;
287 REDUCE1BIT(V);
288 Htable[1] = V;
289 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
290 V=Htable[4];
291 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
292 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
293 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
294 V=Htable[8];
295 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
296 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
297 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
298 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
299 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
300 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
301 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
302#endif
303#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
304 /*
305 * ARM assembler expects specific dword order in Htable.
306 */
307 {
308 int j;
309
310 if (BYTE_ORDER == LITTLE_ENDIAN)
311 for (j=0;j<16;++j) {
312 V = Htable[j];
313 Htable[j].hi = V.lo;
314 Htable[j].lo = V.hi;
315 }
316 else
317 for (j=0;j<16;++j) {
318 V = Htable[j];
319 Htable[j].hi = V.lo<<32|V.lo>>32;
320 Htable[j].lo = V.hi<<32|V.hi>>32;
321 }
322 }
323#endif
324}
325
326#ifndef GHASH_ASM
327static const size_t rem_4bit[16] = {
328 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
329 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
330 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
331 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
332
333static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
334{
335 u128 Z;
336 int cnt = 15;
337 size_t rem, nlo, nhi;
338
339 nlo = ((const u8 *)Xi)[15];
340 nhi = nlo>>4;
341 nlo &= 0xf;
342
343 Z.hi = Htable[nlo].hi;
344 Z.lo = Htable[nlo].lo;
345
346 while (1) {
347 rem = (size_t)Z.lo&0xf;
348 Z.lo = (Z.hi<<60)|(Z.lo>>4);
349 Z.hi = (Z.hi>>4);
350 if (sizeof(size_t)==8)
351 Z.hi ^= rem_4bit[rem];
352 else
353 Z.hi ^= (u64)rem_4bit[rem]<<32;
354
355 Z.hi ^= Htable[nhi].hi;
356 Z.lo ^= Htable[nhi].lo;
357
358 if (--cnt<0) break;
359
360 nlo = ((const u8 *)Xi)[cnt];
361 nhi = nlo>>4;
362 nlo &= 0xf;
363
364 rem = (size_t)Z.lo&0xf;
365 Z.lo = (Z.hi<<60)|(Z.lo>>4);
366 Z.hi = (Z.hi>>4);
367 if (sizeof(size_t)==8)
368 Z.hi ^= rem_4bit[rem];
369 else
370 Z.hi ^= (u64)rem_4bit[rem]<<32;
371
372 Z.hi ^= Htable[nlo].hi;
373 Z.lo ^= Htable[nlo].lo;
374 }
375
376 if (BYTE_ORDER == LITTLE_ENDIAN) {
377#ifdef BSWAP8
378 Xi[0] = BSWAP8(Z.hi);
379 Xi[1] = BSWAP8(Z.lo);
380#else
381 u8 *p = (u8 *)Xi;
382 u32 v;
383 v = (u32)(Z.hi>>32); PUTU32(p,v);
384 v = (u32)(Z.hi); PUTU32(p+4,v);
385 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
386 v = (u32)(Z.lo); PUTU32(p+12,v);
387#endif
388 }
389 else {
390 Xi[0] = Z.hi;
391 Xi[1] = Z.lo;
392 }
393}
394
395#if !defined(OPENSSL_SMALL_FOOTPRINT)
396/*
397 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
398 * details... Compiler-generated code doesn't seem to give any
399 * performance improvement, at least not on x86[_64]. It's here
400 * mostly as reference and a placeholder for possible future
401 * non-trivial optimization[s]...
402 */
403static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
404 const u8 *inp,size_t len)
405{
406 u128 Z;
407 int cnt;
408 size_t rem, nlo, nhi;
409
410#if 1
411 do {
412 cnt = 15;
413 nlo = ((const u8 *)Xi)[15];
414 nlo ^= inp[15];
415 nhi = nlo>>4;
416 nlo &= 0xf;
417
418 Z.hi = Htable[nlo].hi;
419 Z.lo = Htable[nlo].lo;
420
421 while (1) {
422 rem = (size_t)Z.lo&0xf;
423 Z.lo = (Z.hi<<60)|(Z.lo>>4);
424 Z.hi = (Z.hi>>4);
425 if (sizeof(size_t)==8)
426 Z.hi ^= rem_4bit[rem];
427 else
428 Z.hi ^= (u64)rem_4bit[rem]<<32;
429
430 Z.hi ^= Htable[nhi].hi;
431 Z.lo ^= Htable[nhi].lo;
432
433 if (--cnt<0) break;
434
435 nlo = ((const u8 *)Xi)[cnt];
436 nlo ^= inp[cnt];
437 nhi = nlo>>4;
438 nlo &= 0xf;
439
440 rem = (size_t)Z.lo&0xf;
441 Z.lo = (Z.hi<<60)|(Z.lo>>4);
442 Z.hi = (Z.hi>>4);
443 if (sizeof(size_t)==8)
444 Z.hi ^= rem_4bit[rem];
445 else
446 Z.hi ^= (u64)rem_4bit[rem]<<32;
447
448 Z.hi ^= Htable[nlo].hi;
449 Z.lo ^= Htable[nlo].lo;
450 }
451#else
452 /*
453 * Extra 256+16 bytes per-key plus 512 bytes shared tables
454 * [should] give ~50% improvement... One could have PACK()-ed
455 * the rem_8bit even here, but the priority is to minimize
456 * cache footprint...
457 */
458 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
459 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
460 static const unsigned short rem_8bit[256] = {
461 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
462 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
463 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
464 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
465 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
466 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
467 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
468 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
469 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
470 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
471 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
472 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
473 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
474 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
475 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
476 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
477 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
478 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
479 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
480 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
481 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
482 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
483 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
484 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
485 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
486 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
487 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
488 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
489 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
490 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
491 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
492 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
493 /*
494 * This pre-processing phase slows down procedure by approximately
495 * same time as it makes each loop spin faster. In other words
496 * single block performance is approximately same as straightforward
497 * "4-bit" implementation, and then it goes only faster...
498 */
499 for (cnt=0; cnt<16; ++cnt) {
500 Z.hi = Htable[cnt].hi;
501 Z.lo = Htable[cnt].lo;
502 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
503 Hshr4[cnt].hi = (Z.hi>>4);
504 Hshl4[cnt] = (u8)(Z.lo<<4);
505 }
506
507 do {
508 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
509 nlo = ((const u8 *)Xi)[cnt];
510 nlo ^= inp[cnt];
511 nhi = nlo>>4;
512 nlo &= 0xf;
513
514 Z.hi ^= Htable[nlo].hi;
515 Z.lo ^= Htable[nlo].lo;
516
517 rem = (size_t)Z.lo&0xff;
518
519 Z.lo = (Z.hi<<56)|(Z.lo>>8);
520 Z.hi = (Z.hi>>8);
521
522 Z.hi ^= Hshr4[nhi].hi;
523 Z.lo ^= Hshr4[nhi].lo;
524 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
525 }
526
527 nlo = ((const u8 *)Xi)[0];
528 nlo ^= inp[0];
529 nhi = nlo>>4;
530 nlo &= 0xf;
531
532 Z.hi ^= Htable[nlo].hi;
533 Z.lo ^= Htable[nlo].lo;
534
535 rem = (size_t)Z.lo&0xf;
536
537 Z.lo = (Z.hi<<60)|(Z.lo>>4);
538 Z.hi = (Z.hi>>4);
539
540 Z.hi ^= Htable[nhi].hi;
541 Z.lo ^= Htable[nhi].lo;
542 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
543#endif
544
545 if (BYTE_ORDER == LITTLE_ENDIAN) {
546#ifdef BSWAP8
547 Xi[0] = BSWAP8(Z.hi);
548 Xi[1] = BSWAP8(Z.lo);
549#else
550 u8 *p = (u8 *)Xi;
551 u32 v;
552 v = (u32)(Z.hi>>32); PUTU32(p,v);
553 v = (u32)(Z.hi); PUTU32(p+4,v);
554 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
555 v = (u32)(Z.lo); PUTU32(p+12,v);
556#endif
557 }
558 else {
559 Xi[0] = Z.hi;
560 Xi[1] = Z.lo;
561 }
562 } while (inp+=16, len-=16);
563}
564#endif
565#else
566void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
567void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
568#endif
569
570#define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
571#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
572#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
573/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
574 * trashing effect. In other words idea is to hash data while it's
575 * still in L1 cache after encryption pass... */
576#define GHASH_CHUNK (3*1024)
577#endif
578
579#else /* TABLE_BITS */
580
581static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
582{
583 u128 V,Z = { 0,0 };
584 long X;
585 int i,j;
586 const long *xi = (const long *)Xi;
587
588 V.hi = H[0]; /* H is in host byte order, no byte swapping */
589 V.lo = H[1];
590
591 for (j=0; j<16/sizeof(long); ++j) {
592 if (BYTE_ORDER == LITTLE_ENDIAN) {
593 if (sizeof(long)==8) {
594#ifdef BSWAP8
595 X = (long)(BSWAP8(xi[j]));
596#else
597 const u8 *p = (const u8 *)(xi+j);
598 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
599#endif
600 }
601 else {
602 const u8 *p = (const u8 *)(xi+j);
603 X = (long)GETU32(p);
604 }
605 }
606 else
607 X = xi[j];
608
609 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
610 u64 M = (u64)(X>>(8*sizeof(long)-1));
611 Z.hi ^= V.hi&M;
612 Z.lo ^= V.lo&M;
613
614 REDUCE1BIT(V);
615 }
616 }
617
618 if (BYTE_ORDER == LITTLE_ENDIAN) {
619#ifdef BSWAP8
620 Xi[0] = BSWAP8(Z.hi);
621 Xi[1] = BSWAP8(Z.lo);
622#else
623 u8 *p = (u8 *)Xi;
624 u32 v;
625 v = (u32)(Z.hi>>32); PUTU32(p,v);
626 v = (u32)(Z.hi); PUTU32(p+4,v);
627 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
628 v = (u32)(Z.lo); PUTU32(p+12,v);
629#endif
630 }
631 else {
632 Xi[0] = Z.hi;
633 Xi[1] = Z.lo;
634 }
635}
636#define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
637
638#endif
639
640#if TABLE_BITS==4 && defined(GHASH_ASM)
641# if !defined(I386_ONLY) && \
642 (defined(__i386) || defined(__i386__) || \
643 defined(__x86_64) || defined(__x86_64__) || \
644 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
645# define GHASH_ASM_X86_OR_64
646# define GCM_FUNCREF_4BIT
647extern unsigned int OPENSSL_ia32cap_P[2];
648
649void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
650void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
651void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
652
653# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
654# define GHASH_ASM_X86
655void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
656void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
657
658void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
659void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
660# endif
661# elif defined(__arm__) || defined(__arm)
662# include "arm_arch.h"
663# if __ARM_ARCH__>=7
664# define GHASH_ASM_ARM
665# define GCM_FUNCREF_4BIT
666void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
667void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
668# endif
669# endif
670#endif
671
672#ifdef GCM_FUNCREF_4BIT
673# undef GCM_MUL
674# define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
675# ifdef GHASH
676# undef GHASH
677# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
678# endif
679#endif
680
681void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
682{
683 memset(ctx,0,sizeof(*ctx));
684 ctx->block = block;
685 ctx->key = key;
686
687 (*block)(ctx->H.c,ctx->H.c,key);
688
689 if (BYTE_ORDER == LITTLE_ENDIAN) {
690 /* H is stored in host byte order */
691#ifdef BSWAP8
692 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
693 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
694#else
695 u8 *p = ctx->H.c;
696 u64 hi,lo;
697 hi = (u64)GETU32(p) <<32|GETU32(p+4);
698 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
699 ctx->H.u[0] = hi;
700 ctx->H.u[1] = lo;
701#endif
702 }
703
704#if TABLE_BITS==8
705 gcm_init_8bit(ctx->Htable,ctx->H.u);
706#elif TABLE_BITS==4
707# if defined(GHASH_ASM_X86_OR_64)
708# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
709 if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
710 OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
711 gcm_init_clmul(ctx->Htable,ctx->H.u);
712 ctx->gmult = gcm_gmult_clmul;
713 ctx->ghash = gcm_ghash_clmul;
714 return;
715 }
716# endif
717 gcm_init_4bit(ctx->Htable,ctx->H.u);
718# if defined(GHASH_ASM_X86) /* x86 only */
719# if defined(OPENSSL_IA32_SSE2)
720 if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */
721# else
722 if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */
723# endif
724 ctx->gmult = gcm_gmult_4bit_mmx;
725 ctx->ghash = gcm_ghash_4bit_mmx;
726 } else {
727 ctx->gmult = gcm_gmult_4bit_x86;
728 ctx->ghash = gcm_ghash_4bit_x86;
729 }
730# else
731 ctx->gmult = gcm_gmult_4bit;
732 ctx->ghash = gcm_ghash_4bit;
733# endif
734# elif defined(GHASH_ASM_ARM)
735 if (OPENSSL_armcap_P & ARMV7_NEON) {
736 ctx->gmult = gcm_gmult_neon;
737 ctx->ghash = gcm_ghash_neon;
738 } else {
739 gcm_init_4bit(ctx->Htable,ctx->H.u);
740 ctx->gmult = gcm_gmult_4bit;
741 ctx->ghash = gcm_ghash_4bit;
742 }
743# else
744 gcm_init_4bit(ctx->Htable,ctx->H.u);
745# endif
746#endif
747}
748
749void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
750{
751 unsigned int ctr;
752#ifdef GCM_FUNCREF_4BIT
753 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
754#endif
755
756 ctx->Yi.u[0] = 0;
757 ctx->Yi.u[1] = 0;
758 ctx->Xi.u[0] = 0;
759 ctx->Xi.u[1] = 0;
760 ctx->len.u[0] = 0; /* AAD length */
761 ctx->len.u[1] = 0; /* message length */
762 ctx->ares = 0;
763 ctx->mres = 0;
764
765 if (len==12) {
766 memcpy(ctx->Yi.c,iv,12);
767 ctx->Yi.c[15]=1;
768 ctr=1;
769 }
770 else {
771 size_t i;
772 u64 len0 = len;
773
774 while (len>=16) {
775 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
776 GCM_MUL(ctx,Yi);
777 iv += 16;
778 len -= 16;
779 }
780 if (len) {
781 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
782 GCM_MUL(ctx,Yi);
783 }
784 len0 <<= 3;
785 if (BYTE_ORDER == LITTLE_ENDIAN) {
786#ifdef BSWAP8
787 ctx->Yi.u[1] ^= BSWAP8(len0);
788#else
789 ctx->Yi.c[8] ^= (u8)(len0>>56);
790 ctx->Yi.c[9] ^= (u8)(len0>>48);
791 ctx->Yi.c[10] ^= (u8)(len0>>40);
792 ctx->Yi.c[11] ^= (u8)(len0>>32);
793 ctx->Yi.c[12] ^= (u8)(len0>>24);
794 ctx->Yi.c[13] ^= (u8)(len0>>16);
795 ctx->Yi.c[14] ^= (u8)(len0>>8);
796 ctx->Yi.c[15] ^= (u8)(len0);
797#endif
798 }
799 else
800 ctx->Yi.u[1] ^= len0;
801
802 GCM_MUL(ctx,Yi);
803
804 if (BYTE_ORDER == LITTLE_ENDIAN)
805#ifdef BSWAP4
806 ctr = BSWAP4(ctx->Yi.d[3]);
807#else
808 ctr = GETU32(ctx->Yi.c+12);
809#endif
810 else
811 ctr = ctx->Yi.d[3];
812 }
813
814 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
815 ++ctr;
816 if (BYTE_ORDER == LITTLE_ENDIAN)
817#ifdef BSWAP4
818 ctx->Yi.d[3] = BSWAP4(ctr);
819#else
820 PUTU32(ctx->Yi.c+12,ctr);
821#endif
822 else
823 ctx->Yi.d[3] = ctr;
824}
825
826int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
827{
828 size_t i;
829 unsigned int n;
830 u64 alen = ctx->len.u[0];
831#ifdef GCM_FUNCREF_4BIT
832 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
833# ifdef GHASH
834 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
835 const u8 *inp,size_t len) = ctx->ghash;
836# endif
837#endif
838
839 if (ctx->len.u[1]) return -2;
840
841 alen += len;
842 if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
843 return -1;
844 ctx->len.u[0] = alen;
845
846 n = ctx->ares;
847 if (n) {
848 while (n && len) {
849 ctx->Xi.c[n] ^= *(aad++);
850 --len;
851 n = (n+1)%16;
852 }
853 if (n==0) GCM_MUL(ctx,Xi);
854 else {
855 ctx->ares = n;
856 return 0;
857 }
858 }
859
860#ifdef GHASH
861 if ((i = (len&(size_t)-16))) {
862 GHASH(ctx,aad,i);
863 aad += i;
864 len -= i;
865 }
866#else
867 while (len>=16) {
868 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
869 GCM_MUL(ctx,Xi);
870 aad += 16;
871 len -= 16;
872 }
873#endif
874 if (len) {
875 n = (unsigned int)len;
876 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
877 }
878
879 ctx->ares = n;
880 return 0;
881}
882
883int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
884 const unsigned char *in, unsigned char *out,
885 size_t len)
886{
887 unsigned int n, ctr;
888 size_t i;
889 u64 mlen = ctx->len.u[1];
890 block128_f block = ctx->block;
891 void *key = ctx->key;
892#ifdef GCM_FUNCREF_4BIT
893 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
894# ifdef GHASH
895 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
896 const u8 *inp,size_t len) = ctx->ghash;
897# endif
898#endif
899
900 mlen += len;
901 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
902 return -1;
903 ctx->len.u[1] = mlen;
904
905 if (ctx->ares) {
906 /* First call to encrypt finalizes GHASH(AAD) */
907 GCM_MUL(ctx,Xi);
908 ctx->ares = 0;
909 }
910
911 if (BYTE_ORDER == LITTLE_ENDIAN)
912#ifdef BSWAP4
913 ctr = BSWAP4(ctx->Yi.d[3]);
914#else
915 ctr = GETU32(ctx->Yi.c+12);
916#endif
917 else
918 ctr = ctx->Yi.d[3];
919
920 n = ctx->mres;
921#if !defined(OPENSSL_SMALL_FOOTPRINT)
922 if (16%sizeof(size_t) == 0) do { /* always true actually */
923 if (n) {
924 while (n && len) {
925 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
926 --len;
927 n = (n+1)%16;
928 }
929 if (n==0) GCM_MUL(ctx,Xi);
930 else {
931 ctx->mres = n;
932 return 0;
933 }
934 }
935#ifdef __STRICT_ALIGNMENT
936 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
937 break;
938#endif
939#if defined(GHASH) && defined(GHASH_CHUNK)
940 while (len>=GHASH_CHUNK) {
941 size_t j=GHASH_CHUNK;
942
943 while (j) {
944 size_t *out_t=(size_t *)out;
945 const size_t *in_t=(const size_t *)in;
946
947 (*block)(ctx->Yi.c,ctx->EKi.c,key);
948 ++ctr;
949 if (BYTE_ORDER == LITTLE_ENDIAN)
950#ifdef BSWAP4
951 ctx->Yi.d[3] = BSWAP4(ctr);
952#else
953 PUTU32(ctx->Yi.c+12,ctr);
954#endif
955 else
956 ctx->Yi.d[3] = ctr;
957 for (i=0; i<16/sizeof(size_t); ++i)
958 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
959 out += 16;
960 in += 16;
961 j -= 16;
962 }
963 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
964 len -= GHASH_CHUNK;
965 }
966 if ((i = (len&(size_t)-16))) {
967 size_t j=i;
968
969 while (len>=16) {
970 size_t *out_t=(size_t *)out;
971 const size_t *in_t=(const size_t *)in;
972
973 (*block)(ctx->Yi.c,ctx->EKi.c,key);
974 ++ctr;
975 if (BYTE_ORDER == LITTLE_ENDIAN)
976#ifdef BSWAP4
977 ctx->Yi.d[3] = BSWAP4(ctr);
978#else
979 PUTU32(ctx->Yi.c+12,ctr);
980#endif
981 else
982 ctx->Yi.d[3] = ctr;
983 for (i=0; i<16/sizeof(size_t); ++i)
984 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
985 out += 16;
986 in += 16;
987 len -= 16;
988 }
989 GHASH(ctx,out-j,j);
990 }
991#else
992 while (len>=16) {
993 size_t *out_t=(size_t *)out;
994 const size_t *in_t=(const size_t *)in;
995
996 (*block)(ctx->Yi.c,ctx->EKi.c,key);
997 ++ctr;
998 if (BYTE_ORDER == LITTLE_ENDIAN)
999#ifdef BSWAP4
1000 ctx->Yi.d[3] = BSWAP4(ctr);
1001#else
1002 PUTU32(ctx->Yi.c+12,ctr);
1003#endif
1004 else
1005 ctx->Yi.d[3] = ctr;
1006 for (i=0; i<16/sizeof(size_t); ++i)
1007 ctx->Xi.t[i] ^=
1008 out_t[i] = in_t[i]^ctx->EKi.t[i];
1009 GCM_MUL(ctx,Xi);
1010 out += 16;
1011 in += 16;
1012 len -= 16;
1013 }
1014#endif
1015 if (len) {
1016 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1017 ++ctr;
1018 if (BYTE_ORDER == LITTLE_ENDIAN)
1019#ifdef BSWAP4
1020 ctx->Yi.d[3] = BSWAP4(ctr);
1021#else
1022 PUTU32(ctx->Yi.c+12,ctr);
1023#endif
1024 else
1025 ctx->Yi.d[3] = ctr;
1026 while (len--) {
1027 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1028 ++n;
1029 }
1030 }
1031
1032 ctx->mres = n;
1033 return 0;
1034 } while(0);
1035#endif
1036 for (i=0;i<len;++i) {
1037 if (n==0) {
1038 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1039 ++ctr;
1040 if (BYTE_ORDER == LITTLE_ENDIAN)
1041#ifdef BSWAP4
1042 ctx->Yi.d[3] = BSWAP4(ctr);
1043#else
1044 PUTU32(ctx->Yi.c+12,ctr);
1045#endif
1046 else
1047 ctx->Yi.d[3] = ctr;
1048 }
1049 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1050 n = (n+1)%16;
1051 if (n==0)
1052 GCM_MUL(ctx,Xi);
1053 }
1054
1055 ctx->mres = n;
1056 return 0;
1057}
1058
1059int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1060 const unsigned char *in, unsigned char *out,
1061 size_t len)
1062{
1063 unsigned int n, ctr;
1064 size_t i;
1065 u64 mlen = ctx->len.u[1];
1066 block128_f block = ctx->block;
1067 void *key = ctx->key;
1068#ifdef GCM_FUNCREF_4BIT
1069 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1070# ifdef GHASH
1071 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1072 const u8 *inp,size_t len) = ctx->ghash;
1073# endif
1074#endif
1075
1076 mlen += len;
1077 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1078 return -1;
1079 ctx->len.u[1] = mlen;
1080
1081 if (ctx->ares) {
1082 /* First call to decrypt finalizes GHASH(AAD) */
1083 GCM_MUL(ctx,Xi);
1084 ctx->ares = 0;
1085 }
1086
1087 if (BYTE_ORDER == LITTLE_ENDIAN)
1088#ifdef BSWAP4
1089 ctr = BSWAP4(ctx->Yi.d[3]);
1090#else
1091 ctr = GETU32(ctx->Yi.c+12);
1092#endif
1093 else
1094 ctr = ctx->Yi.d[3];
1095
1096 n = ctx->mres;
1097#if !defined(OPENSSL_SMALL_FOOTPRINT)
1098 if (16%sizeof(size_t) == 0) do { /* always true actually */
1099 if (n) {
1100 while (n && len) {
1101 u8 c = *(in++);
1102 *(out++) = c^ctx->EKi.c[n];
1103 ctx->Xi.c[n] ^= c;
1104 --len;
1105 n = (n+1)%16;
1106 }
1107 if (n==0) GCM_MUL (ctx,Xi);
1108 else {
1109 ctx->mres = n;
1110 return 0;
1111 }
1112 }
1113#ifdef __STRICT_ALIGNMENT
1114 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1115 break;
1116#endif
1117#if defined(GHASH) && defined(GHASH_CHUNK)
1118 while (len>=GHASH_CHUNK) {
1119 size_t j=GHASH_CHUNK;
1120
1121 GHASH(ctx,in,GHASH_CHUNK);
1122 while (j) {
1123 size_t *out_t=(size_t *)out;
1124 const size_t *in_t=(const size_t *)in;
1125
1126 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1127 ++ctr;
1128 if (BYTE_ORDER == LITTLE_ENDIAN)
1129#ifdef BSWAP4
1130 ctx->Yi.d[3] = BSWAP4(ctr);
1131#else
1132 PUTU32(ctx->Yi.c+12,ctr);
1133#endif
1134 else
1135 ctx->Yi.d[3] = ctr;
1136 for (i=0; i<16/sizeof(size_t); ++i)
1137 out_t[i] = in_t[i]^ctx->EKi.t[i];
1138 out += 16;
1139 in += 16;
1140 j -= 16;
1141 }
1142 len -= GHASH_CHUNK;
1143 }
1144 if ((i = (len&(size_t)-16))) {
1145 GHASH(ctx,in,i);
1146 while (len>=16) {
1147 size_t *out_t=(size_t *)out;
1148 const size_t *in_t=(const size_t *)in;
1149
1150 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1151 ++ctr;
1152 if (BYTE_ORDER == LITTLE_ENDIAN)
1153#ifdef BSWAP4
1154 ctx->Yi.d[3] = BSWAP4(ctr);
1155#else
1156 PUTU32(ctx->Yi.c+12,ctr);
1157#endif
1158 else
1159 ctx->Yi.d[3] = ctr;
1160 for (i=0; i<16/sizeof(size_t); ++i)
1161 out_t[i] = in_t[i]^ctx->EKi.t[i];
1162 out += 16;
1163 in += 16;
1164 len -= 16;
1165 }
1166 }
1167#else
1168 while (len>=16) {
1169 size_t *out_t=(size_t *)out;
1170 const size_t *in_t=(const size_t *)in;
1171
1172 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1173 ++ctr;
1174 if (BYTE_ORDER == LITTLE_ENDIAN)
1175#ifdef BSWAP4
1176 ctx->Yi.d[3] = BSWAP4(ctr);
1177#else
1178 PUTU32(ctx->Yi.c+12,ctr);
1179#endif
1180 else
1181 ctx->Yi.d[3] = ctr;
1182 for (i=0; i<16/sizeof(size_t); ++i) {
1183 size_t c = in[i];
1184 out[i] = c^ctx->EKi.t[i];
1185 ctx->Xi.t[i] ^= c;
1186 }
1187 GCM_MUL(ctx,Xi);
1188 out += 16;
1189 in += 16;
1190 len -= 16;
1191 }
1192#endif
1193 if (len) {
1194 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1195 ++ctr;
1196 if (BYTE_ORDER == LITTLE_ENDIAN)
1197#ifdef BSWAP4
1198 ctx->Yi.d[3] = BSWAP4(ctr);
1199#else
1200 PUTU32(ctx->Yi.c+12,ctr);
1201#endif
1202 else
1203 ctx->Yi.d[3] = ctr;
1204 while (len--) {
1205 u8 c = in[n];
1206 ctx->Xi.c[n] ^= c;
1207 out[n] = c^ctx->EKi.c[n];
1208 ++n;
1209 }
1210 }
1211
1212 ctx->mres = n;
1213 return 0;
1214 } while(0);
1215#endif
1216 for (i=0;i<len;++i) {
1217 u8 c;
1218 if (n==0) {
1219 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1220 ++ctr;
1221 if (BYTE_ORDER == LITTLE_ENDIAN)
1222#ifdef BSWAP4
1223 ctx->Yi.d[3] = BSWAP4(ctr);
1224#else
1225 PUTU32(ctx->Yi.c+12,ctr);
1226#endif
1227 else
1228 ctx->Yi.d[3] = ctr;
1229 }
1230 c = in[i];
1231 out[i] = c^ctx->EKi.c[n];
1232 ctx->Xi.c[n] ^= c;
1233 n = (n+1)%16;
1234 if (n==0)
1235 GCM_MUL(ctx,Xi);
1236 }
1237
1238 ctx->mres = n;
1239 return 0;
1240}
1241
1242int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1243 const unsigned char *in, unsigned char *out,
1244 size_t len, ctr128_f stream)
1245{
1246 unsigned int n, ctr;
1247 size_t i;
1248 u64 mlen = ctx->len.u[1];
1249 void *key = ctx->key;
1250#ifdef GCM_FUNCREF_4BIT
1251 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1252# ifdef GHASH
1253 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1254 const u8 *inp,size_t len) = ctx->ghash;
1255# endif
1256#endif
1257
1258 mlen += len;
1259 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1260 return -1;
1261 ctx->len.u[1] = mlen;
1262
1263 if (ctx->ares) {
1264 /* First call to encrypt finalizes GHASH(AAD) */
1265 GCM_MUL(ctx,Xi);
1266 ctx->ares = 0;
1267 }
1268
1269 if (BYTE_ORDER == LITTLE_ENDIAN)
1270#ifdef BSWAP4
1271 ctr = BSWAP4(ctx->Yi.d[3]);
1272#else
1273 ctr = GETU32(ctx->Yi.c+12);
1274#endif
1275 else
1276 ctr = ctx->Yi.d[3];
1277
1278 n = ctx->mres;
1279 if (n) {
1280 while (n && len) {
1281 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1282 --len;
1283 n = (n+1)%16;
1284 }
1285 if (n==0) GCM_MUL(ctx,Xi);
1286 else {
1287 ctx->mres = n;
1288 return 0;
1289 }
1290 }
1291#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1292 while (len>=GHASH_CHUNK) {
1293 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1294 ctr += GHASH_CHUNK/16;
1295 if (BYTE_ORDER == LITTLE_ENDIAN)
1296#ifdef BSWAP4
1297 ctx->Yi.d[3] = BSWAP4(ctr);
1298#else
1299 PUTU32(ctx->Yi.c+12,ctr);
1300#endif
1301 else
1302 ctx->Yi.d[3] = ctr;
1303 GHASH(ctx,out,GHASH_CHUNK);
1304 out += GHASH_CHUNK;
1305 in += GHASH_CHUNK;
1306 len -= GHASH_CHUNK;
1307 }
1308#endif
1309 if ((i = (len&(size_t)-16))) {
1310 size_t j=i/16;
1311
1312 (*stream)(in,out,j,key,ctx->Yi.c);
1313 ctr += (unsigned int)j;
1314 if (BYTE_ORDER == LITTLE_ENDIAN)
1315#ifdef BSWAP4
1316 ctx->Yi.d[3] = BSWAP4(ctr);
1317#else
1318 PUTU32(ctx->Yi.c+12,ctr);
1319#endif
1320 else
1321 ctx->Yi.d[3] = ctr;
1322 in += i;
1323 len -= i;
1324#if defined(GHASH)
1325 GHASH(ctx,out,i);
1326 out += i;
1327#else
1328 while (j--) {
1329 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1330 GCM_MUL(ctx,Xi);
1331 out += 16;
1332 }
1333#endif
1334 }
1335 if (len) {
1336 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1337 ++ctr;
1338 if (BYTE_ORDER == LITTLE_ENDIAN)
1339#ifdef BSWAP4
1340 ctx->Yi.d[3] = BSWAP4(ctr);
1341#else
1342 PUTU32(ctx->Yi.c+12,ctr);
1343#endif
1344 else
1345 ctx->Yi.d[3] = ctr;
1346 while (len--) {
1347 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1348 ++n;
1349 }
1350 }
1351
1352 ctx->mres = n;
1353 return 0;
1354}
1355
1356int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1357 const unsigned char *in, unsigned char *out,
1358 size_t len,ctr128_f stream)
1359{
1360 unsigned int n, ctr;
1361 size_t i;
1362 u64 mlen = ctx->len.u[1];
1363 void *key = ctx->key;
1364#ifdef GCM_FUNCREF_4BIT
1365 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1366# ifdef GHASH
1367 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1368 const u8 *inp,size_t len) = ctx->ghash;
1369# endif
1370#endif
1371
1372 mlen += len;
1373 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1374 return -1;
1375 ctx->len.u[1] = mlen;
1376
1377 if (ctx->ares) {
1378 /* First call to decrypt finalizes GHASH(AAD) */
1379 GCM_MUL(ctx,Xi);
1380 ctx->ares = 0;
1381 }
1382
1383 if (BYTE_ORDER == LITTLE_ENDIAN)
1384#ifdef BSWAP4
1385 ctr = BSWAP4(ctx->Yi.d[3]);
1386#else
1387 ctr = GETU32(ctx->Yi.c+12);
1388#endif
1389 else
1390 ctr = ctx->Yi.d[3];
1391
1392 n = ctx->mres;
1393 if (n) {
1394 while (n && len) {
1395 u8 c = *(in++);
1396 *(out++) = c^ctx->EKi.c[n];
1397 ctx->Xi.c[n] ^= c;
1398 --len;
1399 n = (n+1)%16;
1400 }
1401 if (n==0) GCM_MUL (ctx,Xi);
1402 else {
1403 ctx->mres = n;
1404 return 0;
1405 }
1406 }
1407#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1408 while (len>=GHASH_CHUNK) {
1409 GHASH(ctx,in,GHASH_CHUNK);
1410 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1411 ctr += GHASH_CHUNK/16;
1412 if (BYTE_ORDER == LITTLE_ENDIAN)
1413#ifdef BSWAP4
1414 ctx->Yi.d[3] = BSWAP4(ctr);
1415#else
1416 PUTU32(ctx->Yi.c+12,ctr);
1417#endif
1418 else
1419 ctx->Yi.d[3] = ctr;
1420 out += GHASH_CHUNK;
1421 in += GHASH_CHUNK;
1422 len -= GHASH_CHUNK;
1423 }
1424#endif
1425 if ((i = (len&(size_t)-16))) {
1426 size_t j=i/16;
1427
1428#if defined(GHASH)
1429 GHASH(ctx,in,i);
1430#else
1431 while (j--) {
1432 size_t k;
1433 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1434 GCM_MUL(ctx,Xi);
1435 in += 16;
1436 }
1437 j = i/16;
1438 in -= i;
1439#endif
1440 (*stream)(in,out,j,key,ctx->Yi.c);
1441 ctr += (unsigned int)j;
1442 if (BYTE_ORDER == LITTLE_ENDIAN)
1443#ifdef BSWAP4
1444 ctx->Yi.d[3] = BSWAP4(ctr);
1445#else
1446 PUTU32(ctx->Yi.c+12,ctr);
1447#endif
1448 else
1449 ctx->Yi.d[3] = ctr;
1450 out += i;
1451 in += i;
1452 len -= i;
1453 }
1454 if (len) {
1455 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1456 ++ctr;
1457 if (BYTE_ORDER == LITTLE_ENDIAN)
1458#ifdef BSWAP4
1459 ctx->Yi.d[3] = BSWAP4(ctr);
1460#else
1461 PUTU32(ctx->Yi.c+12,ctr);
1462#endif
1463 else
1464 ctx->Yi.d[3] = ctr;
1465 while (len--) {
1466 u8 c = in[n];
1467 ctx->Xi.c[n] ^= c;
1468 out[n] = c^ctx->EKi.c[n];
1469 ++n;
1470 }
1471 }
1472
1473 ctx->mres = n;
1474 return 0;
1475}
1476
1477int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1478 size_t len)
1479{
1480 u64 alen = ctx->len.u[0]<<3;
1481 u64 clen = ctx->len.u[1]<<3;
1482#ifdef GCM_FUNCREF_4BIT
1483 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1484#endif
1485
1486 if (ctx->mres || ctx->ares)
1487 GCM_MUL(ctx,Xi);
1488
1489 if (BYTE_ORDER == LITTLE_ENDIAN) {
1490#ifdef BSWAP8
1491 alen = BSWAP8(alen);
1492 clen = BSWAP8(clen);
1493#else
1494 u8 *p = ctx->len.c;
1495
1496 ctx->len.u[0] = alen;
1497 ctx->len.u[1] = clen;
1498
1499 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1500 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1501#endif
1502 }
1503
1504 ctx->Xi.u[0] ^= alen;
1505 ctx->Xi.u[1] ^= clen;
1506 GCM_MUL(ctx,Xi);
1507
1508 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1509 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1510
1511 if (tag && len<=sizeof(ctx->Xi))
1512 return memcmp(ctx->Xi.c,tag,len);
1513 else
1514 return -1;
1515}
1516
1517void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1518{
1519 CRYPTO_gcm128_finish(ctx, NULL, 0);
1520 memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1521}
1522
1523GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1524{
1525 GCM128_CONTEXT *ret;
1526
1527 if ((ret = malloc(sizeof(GCM128_CONTEXT))))
1528 CRYPTO_gcm128_init(ret,key,block);
1529
1530 return ret;
1531}
1532
1533void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1534{
1535 if (ctx) {
1536 explicit_bzero(ctx,sizeof(*ctx));
1537 free(ctx);
1538 }
1539}