aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2013-01-15 15:22:30 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2013-01-15 15:22:30 +0100
commita55df2793660941f42589182537d02ce54eaed66 (patch)
treefe4d2df5b05d5f78798c368ab77a5298d98af21b
parent07a54e21dd08bcd752a23095fdedc904eb7127fb (diff)
downloadbusybox-w32-a55df2793660941f42589182537d02ce54eaed66.tar.gz
busybox-w32-a55df2793660941f42589182537d02ce54eaed66.tar.bz2
busybox-w32-a55df2793660941f42589182537d02ce54eaed66.zip
sha3: code shrink
function old new delta KeccakF 1064 1053 -11 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--libbb/hash_md5_sha.c31
1 files changed, 18 insertions, 13 deletions
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c
index 3b1366762..a0eec7789 100644
--- a/libbb/hash_md5_sha.c
+++ b/libbb/hash_md5_sha.c
@@ -988,24 +988,29 @@ static void KeccakF(uint64_t *state)
988 for (round = 0; round < cKeccakNumberOfRounds; ++round) { 988 for (round = 0; round < cKeccakNumberOfRounds; ++round) {
989 /* Theta */ 989 /* Theta */
990 { 990 {
991 uint64_t BC[5]; 991 uint64_t BC[10];
992 for (x = 0; x < 5; ++x) { 992 for (x = 0; x < 5; ++x) {
993 BC[x] = state[x] ^ state[5 + x] ^ state[10 + x] ^ 993 BC[x + 5] = BC[x] = state[x]
994 state[15 + x] ^ state[20 + x]; 994 ^ state[x + 5] ^ state[x + 10]
995 ^ state[x + 15] ^ state[x + 20];
995 } 996 }
997 /* Using 2x5 vector above eliminates the need to use
998 * [Mod5[x+N]] index trick below to calculate (x+N) % 5,
999 * and the code is a bit _smaller_.
1000 */
996 for (x = 0; x < 5; ++x) { 1001 for (x = 0; x < 5; ++x) {
997 uint64_t temp = BC[KeccakF_Mod5[x + 4]] ^ 1002 uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1);
998 rotl64(BC[KeccakF_Mod5[x + 1]], 1);
999 if (SHA3_SMALL && !ARCH_IS_64BIT) { 1003 if (SHA3_SMALL && !ARCH_IS_64BIT) {
1000 for (y = 0; y <= 20; y += 5) 1004 for (y = 0; y <= 20; y += 5)
1001 state[y + x] ^= temp; 1005 state[x + y] ^= temp;
1002 } else { 1006 } else {
1003 /* on 64-bit arch, this is actually smaller too */ 1007 /* On 64-bit, this is also smaller,
1004 state[0 + x] ^= temp; 1008 * not only faster, than loop */
1005 state[5 + x] ^= temp; 1009 state[x] ^= temp;
1006 state[10 + x] ^= temp; 1010 state[x + 5] ^= temp;
1007 state[15 + x] ^= temp; 1011 state[x + 10] ^= temp;
1008 state[20 + x] ^= temp; 1012 state[x + 15] ^= temp;
1013 state[x + 20] ^= temp;
1009 } 1014 }
1010 } 1015 }
1011 } 1016 }
@@ -1019,7 +1024,7 @@ static void KeccakF(uint64_t *state)
1019 t1 = t0; 1024 t1 = t0;
1020 } 1025 }
1021 } else { 1026 } else {
1022 /* Especially large benefit for 32-bit arch: 1027 /* Especially large benefit for 32-bit arch (75% faster):
1023 * 64-bit rotations by non-constant usually are SLOW on those. 1028 * 64-bit rotations by non-constant usually are SLOW on those.
1024 * We resort to unrolling here. 1029 * We resort to unrolling here.
1025 * This optimizes out KeccakF_PiLane[] and KeccakF_RotationConstants[], 1030 * This optimizes out KeccakF_PiLane[] and KeccakF_RotationConstants[],