diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2018-11-24 14:08:29 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2018-11-24 14:08:29 +0100 |
commit | 03569bc50f0d731aa3af94ab600adc59eaac3162 (patch) | |
tree | 229b93e3a0bdc0b82946fa421a96b39268159c2b | |
parent | 941440cf166ef77ad82c4ead9eae3a8a2552a418 (diff) | |
download | busybox-w32-03569bc50f0d731aa3af94ab600adc59eaac3162.tar.gz busybox-w32-03569bc50f0d731aa3af94ab600adc59eaac3162.tar.bz2 busybox-w32-03569bc50f0d731aa3af94ab600adc59eaac3162.zip |
tls: speed up xor'ing of aligned 16-byte buffers
function old new delta
xorbuf_aligned_AES_BLOCK_SIZE - 23 +23
xwrite_encrypted 585 580 -5
aesgcm_GHASH 233 228 -5
GMULT 192 187 -5
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/3 up/down: 23/-15) Total: 8 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | networking/tls.c | 34 | ||||
-rw-r--r-- | networking/tls.h | 4 | ||||
-rw-r--r-- | networking/tls_aesgcm.c | 15 |
3 files changed, 36 insertions, 17 deletions
diff --git a/networking/tls.c b/networking/tls.c index 1f8c21f8b..b774340ae 100644 --- a/networking/tls.c +++ b/networking/tls.c | |||
@@ -357,6 +357,20 @@ void FAST_FUNC xorbuf(void *dst, const void *src, unsigned count) | |||
357 | xorbuf3(dst, dst, src, count); | 357 | xorbuf3(dst, dst, src, count); |
358 | } | 358 | } |
359 | 359 | ||
360 | void FAST_FUNC xorbuf_aligned_AES_BLOCK_SIZE(void *dst, const void *src) | ||
361 | { | ||
362 | unsigned long *d = dst; | ||
363 | const unsigned long *s = src; | ||
364 | d[0] ^= s[0]; | ||
365 | #if ULONG_MAX <= 0xffffffffffffffff | ||
366 | d[1] ^= s[1]; | ||
367 | #if ULONG_MAX == 0xffffffff | ||
368 | d[2] ^= s[2]; | ||
369 | d[3] ^= s[3]; | ||
370 | #endif | ||
371 | #endif | ||
372 | } | ||
373 | |||
360 | /* Nondestructively see the current hash value */ | 374 | /* Nondestructively see the current hash value */ |
361 | static unsigned sha_peek(md5sha_ctx_t *ctx, void *buffer) | 375 | static unsigned sha_peek(md5sha_ctx_t *ctx, void *buffer) |
362 | { | 376 | { |
@@ -802,10 +816,10 @@ static void xwrite_encrypted_aesgcm(tls_state_t *tls, unsigned size, unsigned ty | |||
802 | { | 816 | { |
803 | #define COUNTER(v) (*(uint32_t*)(v + 12)) | 817 | #define COUNTER(v) (*(uint32_t*)(v + 12)) |
804 | 818 | ||
805 | uint8_t aad[13 + 3] ALIGNED(4); /* +3 creates [16] buffer, simplifying GHASH() */ | 819 | uint8_t aad[13 + 3] ALIGNED_long; /* +3 creates [16] buffer, simplifying GHASH() */ |
806 | uint8_t nonce[12 + 4] ALIGNED(4); /* +4 creates space for AES block counter */ | 820 | uint8_t nonce[12 + 4] ALIGNED_long; /* +4 creates space for AES block counter */ |
807 | uint8_t scratch[AES_BLOCK_SIZE] ALIGNED(4); //[16] | 821 | uint8_t scratch[AES_BLOCK_SIZE] ALIGNED_long; //[16] |
808 | uint8_t authtag[AES_BLOCK_SIZE] ALIGNED(4); //[16] | 822 | uint8_t authtag[AES_BLOCK_SIZE] ALIGNED_long; //[16] |
809 | uint8_t *buf; | 823 | uint8_t *buf; |
810 | struct record_hdr *xhdr; | 824 | struct record_hdr *xhdr; |
811 | unsigned remaining; | 825 | unsigned remaining; |
@@ -850,7 +864,7 @@ static void xwrite_encrypted_aesgcm(tls_state_t *tls, unsigned size, unsigned ty | |||
850 | aesgcm_GHASH(tls->H, aad, /*sizeof(aad),*/ tls->outbuf + OUTBUF_PFX, size, authtag /*, sizeof(authtag)*/); | 864 | aesgcm_GHASH(tls->H, aad, /*sizeof(aad),*/ tls->outbuf + OUTBUF_PFX, size, authtag /*, sizeof(authtag)*/); |
851 | COUNTER(nonce) = htonl(1); | 865 | COUNTER(nonce) = htonl(1); |
852 | aes_encrypt_one_block(&tls->aes_encrypt, nonce, scratch); | 866 | aes_encrypt_one_block(&tls->aes_encrypt, nonce, scratch); |
853 | xorbuf(authtag, scratch, sizeof(authtag)); | 867 | xorbuf_aligned_AES_BLOCK_SIZE(authtag, scratch); |
854 | 868 | ||
855 | memcpy(buf, authtag, sizeof(authtag)); | 869 | memcpy(buf, authtag, sizeof(authtag)); |
856 | #undef COUNTER | 870 | #undef COUNTER |
@@ -938,10 +952,10 @@ static void tls_aesgcm_decrypt(tls_state_t *tls, uint8_t *buf, int size) | |||
938 | { | 952 | { |
939 | #define COUNTER(v) (*(uint32_t*)(v + 12)) | 953 | #define COUNTER(v) (*(uint32_t*)(v + 12)) |
940 | 954 | ||
941 | //uint8_t aad[13 + 3] ALIGNED(4); /* +3 creates [16] buffer, simplifying GHASH() */ | 955 | //uint8_t aad[13 + 3] ALIGNED_long; /* +3 creates [16] buffer, simplifying GHASH() */ |
942 | uint8_t nonce[12 + 4] ALIGNED(4); /* +4 creates space for AES block counter */ | 956 | uint8_t nonce[12 + 4] ALIGNED_long; /* +4 creates space for AES block counter */ |
943 | uint8_t scratch[AES_BLOCK_SIZE] ALIGNED(4); //[16] | 957 | uint8_t scratch[AES_BLOCK_SIZE] ALIGNED_long; //[16] |
944 | //uint8_t authtag[AES_BLOCK_SIZE] ALIGNED(4); //[16] | 958 | //uint8_t authtag[AES_BLOCK_SIZE] ALIGNED_long; //[16] |
945 | unsigned remaining; | 959 | unsigned remaining; |
946 | unsigned cnt; | 960 | unsigned cnt; |
947 | 961 | ||
@@ -973,7 +987,7 @@ static void tls_aesgcm_decrypt(tls_state_t *tls, uint8_t *buf, int size) | |||
973 | //aesgcm_GHASH(tls->H, aad, tls->inbuf + RECHDR_LEN, size, authtag); | 987 | //aesgcm_GHASH(tls->H, aad, tls->inbuf + RECHDR_LEN, size, authtag); |
974 | //COUNTER(nonce) = htonl(1); | 988 | //COUNTER(nonce) = htonl(1); |
975 | //aes_encrypt_one_block(&tls->aes_encrypt, nonce, scratch); | 989 | //aes_encrypt_one_block(&tls->aes_encrypt, nonce, scratch); |
976 | //xorbuf(authtag, scratch, sizeof(authtag)); | 990 | //xorbuf_aligned_AES_BLOCK_SIZE(authtag, scratch); |
977 | 991 | ||
978 | //memcmp(buf, authtag, sizeof(authtag)) || DIE("HASH DOES NOT MATCH!"); | 992 | //memcmp(buf, authtag, sizeof(authtag)) || DIE("HASH DOES NOT MATCH!"); |
979 | #undef COUNTER | 993 | #undef COUNTER |
diff --git a/networking/tls.h b/networking/tls.h index 4b0dc7459..494ed78c4 100644 --- a/networking/tls.h +++ b/networking/tls.h | |||
@@ -81,8 +81,12 @@ typedef int16_t int16; | |||
81 | #define AES_BLOCK_SIZE 16 | 81 | #define AES_BLOCK_SIZE 16 |
82 | 82 | ||
83 | void tls_get_random(void *buf, unsigned len) FAST_FUNC; | 83 | void tls_get_random(void *buf, unsigned len) FAST_FUNC; |
84 | |||
84 | void xorbuf(void* buf, const void* mask, unsigned count) FAST_FUNC; | 85 | void xorbuf(void* buf, const void* mask, unsigned count) FAST_FUNC; |
85 | 86 | ||
87 | #define ALIGNED_long ALIGNED(sizeof(long)) | ||
88 | void xorbuf_aligned_AES_BLOCK_SIZE(void* buf, const void* mask) FAST_FUNC; | ||
89 | |||
86 | #define matrixCryptoGetPrngData(buf, len, userPtr) (tls_get_random(buf, len), PS_SUCCESS) | 90 | #define matrixCryptoGetPrngData(buf, len, userPtr) (tls_get_random(buf, len), PS_SUCCESS) |
87 | 91 | ||
88 | #define psFree(p, pool) free(p) | 92 | #define psFree(p, pool) free(p) |
diff --git a/networking/tls_aesgcm.c b/networking/tls_aesgcm.c index db720e5f6..fd72540c4 100644 --- a/networking/tls_aesgcm.c +++ b/networking/tls_aesgcm.c | |||
@@ -50,8 +50,8 @@ static void RIGHTSHIFTX(byte* x) | |||
50 | 50 | ||
51 | static void GMULT(byte* X, byte* Y) | 51 | static void GMULT(byte* X, byte* Y) |
52 | { | 52 | { |
53 | byte Z[AES_BLOCK_SIZE]; | 53 | byte Z[AES_BLOCK_SIZE] ALIGNED_long; |
54 | byte V[AES_BLOCK_SIZE]; | 54 | byte V[AES_BLOCK_SIZE] ALIGNED_long; |
55 | int i, j; | 55 | int i, j; |
56 | 56 | ||
57 | XMEMSET(Z, 0, AES_BLOCK_SIZE); | 57 | XMEMSET(Z, 0, AES_BLOCK_SIZE); |
@@ -62,7 +62,7 @@ static void GMULT(byte* X, byte* Y) | |||
62 | for (j = 0; j < 8; j++) | 62 | for (j = 0; j < 8; j++) |
63 | { | 63 | { |
64 | if (y & 0x80) { | 64 | if (y & 0x80) { |
65 | xorbuf(Z, V, AES_BLOCK_SIZE); | 65 | xorbuf_aligned_AES_BLOCK_SIZE(Z, V); |
66 | } | 66 | } |
67 | 67 | ||
68 | RIGHTSHIFTX(V); | 68 | RIGHTSHIFTX(V); |
@@ -86,8 +86,8 @@ void FAST_FUNC aesgcm_GHASH(byte* h, | |||
86 | byte* s //, unsigned sSz | 86 | byte* s //, unsigned sSz |
87 | ) | 87 | ) |
88 | { | 88 | { |
89 | byte x[AES_BLOCK_SIZE] ALIGNED(4); | 89 | byte x[AES_BLOCK_SIZE] ALIGNED_long; |
90 | byte scratch[AES_BLOCK_SIZE] ALIGNED(4); | 90 | byte scratch[AES_BLOCK_SIZE] ALIGNED_long; |
91 | word32 blocks, partial; | 91 | word32 blocks, partial; |
92 | //was: byte* h = aes->H; | 92 | //was: byte* h = aes->H; |
93 | 93 | ||
@@ -116,6 +116,7 @@ void FAST_FUNC aesgcm_GHASH(byte* h, | |||
116 | blocks = cSz / AES_BLOCK_SIZE; | 116 | blocks = cSz / AES_BLOCK_SIZE; |
117 | partial = cSz % AES_BLOCK_SIZE; | 117 | partial = cSz % AES_BLOCK_SIZE; |
118 | while (blocks--) { | 118 | while (blocks--) { |
119 | //xorbuf_aligned_AES_BLOCK_SIZE(x, c); - c is not guaranteed to be aligned | ||
119 | xorbuf(x, c, AES_BLOCK_SIZE); | 120 | xorbuf(x, c, AES_BLOCK_SIZE); |
120 | GMULT(x, h); | 121 | GMULT(x, h); |
121 | c += AES_BLOCK_SIZE; | 122 | c += AES_BLOCK_SIZE; |
@@ -124,7 +125,7 @@ void FAST_FUNC aesgcm_GHASH(byte* h, | |||
124 | //XMEMSET(scratch, 0, AES_BLOCK_SIZE); | 125 | //XMEMSET(scratch, 0, AES_BLOCK_SIZE); |
125 | //XMEMCPY(scratch, c, partial); | 126 | //XMEMCPY(scratch, c, partial); |
126 | //xorbuf(x, scratch, AES_BLOCK_SIZE); | 127 | //xorbuf(x, scratch, AES_BLOCK_SIZE); |
127 | xorbuf(x, c, partial); | 128 | xorbuf(x, c, partial);//same result as above |
128 | GMULT(x, h); | 129 | GMULT(x, h); |
129 | } | 130 | } |
130 | } | 131 | } |
@@ -132,7 +133,7 @@ void FAST_FUNC aesgcm_GHASH(byte* h, | |||
132 | /* Hash in the lengths of A and C in bits */ | 133 | /* Hash in the lengths of A and C in bits */ |
133 | FlattenSzInBits(&scratch[0], aSz); | 134 | FlattenSzInBits(&scratch[0], aSz); |
134 | FlattenSzInBits(&scratch[8], cSz); | 135 | FlattenSzInBits(&scratch[8], cSz); |
135 | xorbuf(x, scratch, AES_BLOCK_SIZE); | 136 | xorbuf_aligned_AES_BLOCK_SIZE(x, scratch); |
136 | GMULT(x, h); | 137 | GMULT(x, h); |
137 | 138 | ||
138 | /* Copy the result into s. */ | 139 | /* Copy the result into s. */ |