From ecc9090cfcccf412288147f385808f8f9df97ebe Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Fri, 23 Nov 2018 18:31:26 +0100
Subject: tls: simplify aesgcm_GHASH()

function                                             old     new   delta
xwrite_encrypted                                     604     599      -5
FlattenSzInBits                                       52       -     -52
aesgcm_GHASH                                         395     262    -133
------------------------------------------------------------------------------
(add/remove: 0/1 grow/shrink: 0/2 up/down: 0/-190)           Total: -190 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls.c        | 30 ++++++++++----------
 networking/tls_aesgcm.c | 73 +++++++++++++++++++++++++++++--------------------
 networking/tls_aesgcm.h |  4 +--
 3 files changed, 60 insertions(+), 47 deletions(-)

diff --git a/networking/tls.c b/networking/tls.c
index 23622d76e..3b4f1b7e2 100644
--- a/networking/tls.c
+++ b/networking/tls.c
@@ -270,7 +270,7 @@ struct record_hdr {
 enum {
 	NEED_EC_KEY            = 1 << 0,
 	GOT_CERT_RSA_KEY_ALG   = 1 << 1,
-        GOT_CERT_ECDSA_KEY_ALG = 1 << 2,
+	GOT_CERT_ECDSA_KEY_ALG = 1 << 2,
 	GOT_EC_KEY             = 1 << 3,
 	ENCRYPTION_AESGCM      = 1 << 4,
 };
@@ -756,7 +756,6 @@ static void xwrite_encrypted_and_hmac_signed(tls_state_t *tls, unsigned size, un
 	} while ((size & (AES_BLOCK_SIZE - 1)) != 0);
 
 	/* Encrypt content+MAC+padding in place */
-//optimize key setup
 	aes_cbc_encrypt(
 		&tls->aes_decrypt, /* selects 128/256 */
 		buf - AES_BLOCK_SIZE, /* IV */
@@ -787,8 +786,9 @@ static void xwrite_encrypted_and_hmac_signed(tls_state_t *tls, unsigned size, un
  */
 static void xwrite_encrypted_aesgcm(tls_state_t *tls, unsigned size, unsigned type)
 {
-//go for [16]
-	uint8_t aad[13];
+#define COUNTER(v) (*(uint32_t*)(v + 12))
+
+	uint8_t aad[13 + 3];   /* +3 creates [16] buffer, simplifying GHASH() */
 	uint8_t nonce[12 + 4]; /* +4 creates space for AES block counter */
 	uint8_t scratch[AES_BLOCK_SIZE]; //[16]
 	uint8_t authtag[AES_BLOCK_SIZE]; //[16]
@@ -807,7 +807,8 @@ static void xwrite_encrypted_aesgcm(tls_state_t *tls, unsigned size, unsigned ty
 	aad[9] = TLS_MAJ;
 	aad[10] = TLS_MIN;
 	aad[11] = size >> 8;
-	aad[12] = size & 0xff;
+	/* set aad[12], and clear aad[13..15] */
+	COUNTER(aad) = SWAP_LE32(size & 0xff);
 
 	memcpy(nonce,     tls->client_write_IV, 4);
 	memcpy(nonce + 4, &tls->write_seq64_be, 8);
@@ -817,8 +818,6 @@ static void xwrite_encrypted_aesgcm(tls_state_t *tls, unsigned size, unsigned ty
 	/* seq64 is not used later in this func, can increment here */
 	tls->write_seq64_be = SWAP_BE64(1 + SWAP_BE64(tls->write_seq64_be));
 
-#define COUNTER(v) (*(uint32_t*)(v + 12))
-
 	cnt = 1;
 	remaining = size;
 	while (remaining != 0) {
@@ -833,8 +832,7 @@ static void xwrite_encrypted_aesgcm(tls_state_t *tls, unsigned size, unsigned ty
 		remaining -= n;
 	}
 
-//optimize fixed sizes
-	aesgcm_GHASH(tls->H, aad, sizeof(aad), tls->outbuf + OUTBUF_PFX, size, authtag, sizeof(authtag));
+	aesgcm_GHASH(tls->H, aad, /*sizeof(aad),*/ tls->outbuf + OUTBUF_PFX, size, authtag /*, sizeof(authtag)*/);
 	COUNTER(nonce) = htonl(1);
 	aes_encrypt_one_block(&tls->aes_encrypt, nonce, scratch);
 	xorbuf(authtag, scratch, sizeof(authtag));
@@ -923,8 +921,9 @@ static const char *alert_text(int code)
 
 static void tls_aesgcm_decrypt(tls_state_t *tls, uint8_t *buf, int size)
 {
-//go for [16]
-	//uint8_t aad[13];
+#define COUNTER(v) (*(uint32_t*)(v + 12))
+
+	//uint8_t aad[13 + 3]; /* +3 creates [16] buffer, simplifying GHASH() */
 	uint8_t nonce[12 + 4]; /* +4 creates space for AES block counter */
 	uint8_t scratch[AES_BLOCK_SIZE]; //[16]
 	//uint8_t authtag[AES_BLOCK_SIZE]; //[16]
@@ -935,14 +934,14 @@ static void tls_aesgcm_decrypt(tls_state_t *tls, uint8_t *buf, int size)
 	//aad[9] = TLS_MAJ;
 	//aad[10] = TLS_MIN;
 	//aad[11] = size >> 8;
-	//aad[12] = size & 0xff;
+	///* set aad[12], and clear aad[13..15] */
+	//COUNTER(aad) = SWAP_LE32(size & 0xff);
 
+	//memcpy(aad,       &tls->write_seq64_be, 8);
 	memcpy(nonce,     tls->server_write_IV, 4);
 	memcpy(nonce + 4, buf, 8);
 	buf += 8;
 
-#define COUNTER(v) (*(uint32_t*)(v + 12))
-
 	cnt = 1;
 	remaining = size;
 	while (remaining != 0) {
@@ -957,8 +956,7 @@ static void tls_aesgcm_decrypt(tls_state_t *tls, uint8_t *buf, int size)
 		remaining -= n;
 	}
 
-////optimize fixed sizes
-	//aesgcm_GHASH(tls->H, aad, sizeof(aad), tls->outbuf + OUTBUF_PFX, size, authtag, sizeof(authtag));
+	//aesgcm_GHASH(tls->H, aad, tls->outbuf + OUTBUF_PFX, size, authtag);
 	//COUNTER(nonce) = htonl(1);
 	//aes_encrypt_one_block(&tls->aes_encrypt, nonce, scratch);
 	//xorbuf(authtag, scratch, sizeof(authtag));
diff --git a/networking/tls_aesgcm.c b/networking/tls_aesgcm.c
index eb32f4c05..1a7ddb2e2 100644
--- a/networking/tls_aesgcm.c
+++ b/networking/tls_aesgcm.c
@@ -25,23 +25,26 @@ void FAST_FUNC xorbuf(void* buf, const void* mask, unsigned count)
         b[i] ^= m[i];
 }
 
-/* wolfssl-3.15.3/wolfcrypt/src/aes.c */
+/* from wolfssl-3.15.3/wolfcrypt/src/aes.c */
 
-static void FlattenSzInBits(byte* buf, word32 sz)
+static ALWAYS_INLINE void FlattenSzInBits(byte* buf, word32 sz)
 {
     /* Multiply the sz by 8 */
-    word32 szHi = (sz >> (8*sizeof(sz) - 3));
+//bbox: these sizes are never even close to 2^32/8
+//    word32 szHi = (sz >> (8*sizeof(sz) - 3));
     sz <<= 3;
 
     /* copy over the words of the sz into the destination buffer */
-    buf[0] = (szHi >> 24) & 0xff;
-    buf[1] = (szHi >> 16) & 0xff;
-    buf[2] = (szHi >>  8) & 0xff;
-    buf[3] = szHi & 0xff;
-    buf[4] = (sz >> 24) & 0xff;
-    buf[5] = (sz >> 16) & 0xff;
-    buf[6] = (sz >>  8) & 0xff;
-    buf[7] = sz & 0xff;
+//    buf[0] = (szHi >> 24) & 0xff;
+//    buf[1] = (szHi >> 16) & 0xff;
+//    buf[2] = (szHi >>  8) & 0xff;
+//    buf[3] = szHi & 0xff;
+    move_to_unaligned32(buf, 0);
+//    buf[4] = (sz >> 24) & 0xff;
+//    buf[5] = (sz >> 16) & 0xff;
+//    buf[6] = (sz >>  8) & 0xff;
+//    buf[7] = sz & 0xff;
+    move_to_unaligned32(buf + 4, SWAP_BE32(sz));
 }
 
 static void RIGHTSHIFTX(byte* x)
@@ -83,35 +86,47 @@ static void GMULT(byte* X, byte* Y)
     XMEMCPY(X, Z, AES_BLOCK_SIZE);
 }
 
-void FAST_FUNC aesgcm_GHASH(byte* h, const byte* a, unsigned aSz, const byte* c,
-    unsigned cSz, byte* s, unsigned sSz)
+//bbox:
+// for TLS AES-GCM, a (which as AAD) is always 13 bytes long, and bbox code provides
+// extra 3 zeroed bytes, making it a[16], or a[AES_BLOCK_SIZE].
+// Resulting auth tag in s is also always AES_BLOCK_SIZE bytes.
+//
+// This allows some simplifications.
+#define aSz AES_BLOCK_SIZE
+#define sSz AES_BLOCK_SIZE
+void FAST_FUNC aesgcm_GHASH(byte* h,
+    const byte* a, //unsigned aSz,
+    const byte* c, unsigned cSz,
+    byte* s //, unsigned sSz
+)
 {
     byte x[AES_BLOCK_SIZE];
     byte scratch[AES_BLOCK_SIZE];
     word32 blocks, partial;
     //was: byte* h = aes->H;
 
-    XMEMSET(x, 0, AES_BLOCK_SIZE);
+    //XMEMSET(x, 0, AES_BLOCK_SIZE);
 
     /* Hash in A, the Additional Authentication Data */
-    if (aSz != 0 && a != NULL) {
-        blocks = aSz / AES_BLOCK_SIZE;
-        partial = aSz % AES_BLOCK_SIZE;
-        while (blocks--) {
-            xorbuf(x, a, AES_BLOCK_SIZE);
-            GMULT(x, h);
-            a += AES_BLOCK_SIZE;
-        }
-        if (partial != 0) {
-            XMEMSET(scratch, 0, AES_BLOCK_SIZE);
-            XMEMCPY(scratch, a, partial);
-            xorbuf(x, scratch, AES_BLOCK_SIZE);
+//    if (aSz != 0 && a != NULL) {
+//        blocks = aSz / AES_BLOCK_SIZE;
+//        partial = aSz % AES_BLOCK_SIZE;
+//        while (blocks--) {
+            //xorbuf(x, a, AES_BLOCK_SIZE);
+            XMEMCPY(x, a, AES_BLOCK_SIZE);// memcpy(x,a) = memset(x,0)+xorbuf(x,a)
             GMULT(x, h);
-        }
-    }
+//            a += AES_BLOCK_SIZE;
+//        }
+//        if (partial != 0) {
+//            XMEMSET(scratch, 0, AES_BLOCK_SIZE);
+//            XMEMCPY(scratch, a, partial);
+//            xorbuf(x, scratch, AES_BLOCK_SIZE);
+//            GMULT(x, h);
+//        }
+//    }
 
     /* Hash in C, the Ciphertext */
-    if (cSz != 0 && c != NULL) {
+    if (cSz != 0 /*&& c != NULL*/) {
         blocks = cSz / AES_BLOCK_SIZE;
         partial = cSz % AES_BLOCK_SIZE;
         while (blocks--) {
diff --git a/networking/tls_aesgcm.h b/networking/tls_aesgcm.h
index a71eced54..75694f3fa 100644
--- a/networking/tls_aesgcm.h
+++ b/networking/tls_aesgcm.h
@@ -7,7 +7,7 @@
 void xorbuf(void* buf, const void* mask, unsigned count) FAST_FUNC;
 
 void aesgcm_GHASH(uint8_t* h,
-	const uint8_t* a, unsigned aSz,
+	const uint8_t* a, //unsigned aSz,
 	const uint8_t* c, unsigned cSz,
-	uint8_t* s, unsigned sSz
+	uint8_t* s //, unsigned sSz
 ) FAST_FUNC;
-- 
cgit v1.2.3-55-g6feb