diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2013-01-15 15:22:30 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2013-01-15 15:22:30 +0100 |
commit | a55df2793660941f42589182537d02ce54eaed66 (patch) | |
tree | fe4d2df5b05d5f78798c368ab77a5298d98af21b | |
parent | 07a54e21dd08bcd752a23095fdedc904eb7127fb (diff) | |
download | busybox-w32-a55df2793660941f42589182537d02ce54eaed66.tar.gz busybox-w32-a55df2793660941f42589182537d02ce54eaed66.tar.bz2 busybox-w32-a55df2793660941f42589182537d02ce54eaed66.zip |
sha3: code shrink
function old new delta
KeccakF 1064 1053 -11
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | libbb/hash_md5_sha.c | 31 |
1 files changed, 18 insertions, 13 deletions
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index 3b1366762..a0eec7789 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c | |||
@@ -988,24 +988,29 @@ static void KeccakF(uint64_t *state) | |||
988 | for (round = 0; round < cKeccakNumberOfRounds; ++round) { | 988 | for (round = 0; round < cKeccakNumberOfRounds; ++round) { |
989 | /* Theta */ | 989 | /* Theta */ |
990 | { | 990 | { |
991 | uint64_t BC[5]; | 991 | uint64_t BC[10]; |
992 | for (x = 0; x < 5; ++x) { | 992 | for (x = 0; x < 5; ++x) { |
993 | BC[x] = state[x] ^ state[5 + x] ^ state[10 + x] ^ | 993 | BC[x + 5] = BC[x] = state[x] |
994 | state[15 + x] ^ state[20 + x]; | 994 | ^ state[x + 5] ^ state[x + 10] |
995 | ^ state[x + 15] ^ state[x + 20]; | ||
995 | } | 996 | } |
997 | /* Using 2x5 vector above eliminates the need to use | ||
998 | * [Mod5[x+N]] index trick below to calculate (x+N) % 5, | ||
999 | * and the code is a bit _smaller_. | ||
1000 | */ | ||
996 | for (x = 0; x < 5; ++x) { | 1001 | for (x = 0; x < 5; ++x) { |
997 | uint64_t temp = BC[KeccakF_Mod5[x + 4]] ^ | 1002 | uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1); |
998 | rotl64(BC[KeccakF_Mod5[x + 1]], 1); | ||
999 | if (SHA3_SMALL && !ARCH_IS_64BIT) { | 1003 | if (SHA3_SMALL && !ARCH_IS_64BIT) { |
1000 | for (y = 0; y <= 20; y += 5) | 1004 | for (y = 0; y <= 20; y += 5) |
1001 | state[y + x] ^= temp; | 1005 | state[x + y] ^= temp; |
1002 | } else { | 1006 | } else { |
1003 | /* on 64-bit arch, this is actually smaller too */ | 1007 | /* On 64-bit, this is also smaller, |
1004 | state[0 + x] ^= temp; | 1008 | * not only faster, than loop */ |
1005 | state[5 + x] ^= temp; | 1009 | state[x] ^= temp; |
1006 | state[10 + x] ^= temp; | 1010 | state[x + 5] ^= temp; |
1007 | state[15 + x] ^= temp; | 1011 | state[x + 10] ^= temp; |
1008 | state[20 + x] ^= temp; | 1012 | state[x + 15] ^= temp; |
1013 | state[x + 20] ^= temp; | ||
1009 | } | 1014 | } |
1010 | } | 1015 | } |
1011 | } | 1016 | } |
@@ -1019,7 +1024,7 @@ static void KeccakF(uint64_t *state) | |||
1019 | t1 = t0; | 1024 | t1 = t0; |
1020 | } | 1025 | } |
1021 | } else { | 1026 | } else { |
1022 | /* Especially large benefit for 32-bit arch: | 1027 | /* Especially large benefit for 32-bit arch (75% faster): |
1023 | * 64-bit rotations by non-constant usually are SLOW on those. | 1028 | * 64-bit rotations by non-constant usually are SLOW on those. |
1024 | * We resort to unrolling here. | 1029 | * We resort to unrolling here. |
1025 | * This optimizes out KeccakF_PiLane[] and KeccakF_RotationConstants[], | 1030 | * This optimizes out KeccakF_PiLane[] and KeccakF_RotationConstants[], |