diff options
| author | Denys Vlasenko <vda.linux@googlemail.com> | 2013-01-15 15:22:30 +0100 |
|---|---|---|
| committer | Denys Vlasenko <vda.linux@googlemail.com> | 2013-01-15 15:22:30 +0100 |
| commit | a55df2793660941f42589182537d02ce54eaed66 (patch) | |
| tree | fe4d2df5b05d5f78798c368ab77a5298d98af21b /libbb | |
| parent | 07a54e21dd08bcd752a23095fdedc904eb7127fb (diff) | |
| download | busybox-w32-a55df2793660941f42589182537d02ce54eaed66.tar.gz busybox-w32-a55df2793660941f42589182537d02ce54eaed66.tar.bz2 busybox-w32-a55df2793660941f42589182537d02ce54eaed66.zip | |
sha3: code shrink
function old new delta
KeccakF 1064 1053 -11
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb')
| -rw-r--r-- | libbb/hash_md5_sha.c | 31 |
1 files changed, 18 insertions, 13 deletions
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index 3b1366762..a0eec7789 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c | |||
| @@ -988,24 +988,29 @@ static void KeccakF(uint64_t *state) | |||
| 988 | for (round = 0; round < cKeccakNumberOfRounds; ++round) { | 988 | for (round = 0; round < cKeccakNumberOfRounds; ++round) { |
| 989 | /* Theta */ | 989 | /* Theta */ |
| 990 | { | 990 | { |
| 991 | uint64_t BC[5]; | 991 | uint64_t BC[10]; |
| 992 | for (x = 0; x < 5; ++x) { | 992 | for (x = 0; x < 5; ++x) { |
| 993 | BC[x] = state[x] ^ state[5 + x] ^ state[10 + x] ^ | 993 | BC[x + 5] = BC[x] = state[x] |
| 994 | state[15 + x] ^ state[20 + x]; | 994 | ^ state[x + 5] ^ state[x + 10] |
| 995 | ^ state[x + 15] ^ state[x + 20]; | ||
| 995 | } | 996 | } |
| 997 | /* Using 2x5 vector above eliminates the need to use | ||
| 998 | * [Mod5[x+N]] index trick below to calculate (x+N) % 5, | ||
| 999 | * and the code is a bit _smaller_. | ||
| 1000 | */ | ||
| 996 | for (x = 0; x < 5; ++x) { | 1001 | for (x = 0; x < 5; ++x) { |
| 997 | uint64_t temp = BC[KeccakF_Mod5[x + 4]] ^ | 1002 | uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1); |
| 998 | rotl64(BC[KeccakF_Mod5[x + 1]], 1); | ||
| 999 | if (SHA3_SMALL && !ARCH_IS_64BIT) { | 1003 | if (SHA3_SMALL && !ARCH_IS_64BIT) { |
| 1000 | for (y = 0; y <= 20; y += 5) | 1004 | for (y = 0; y <= 20; y += 5) |
| 1001 | state[y + x] ^= temp; | 1005 | state[x + y] ^= temp; |
| 1002 | } else { | 1006 | } else { |
| 1003 | /* on 64-bit arch, this is actually smaller too */ | 1007 | /* On 64-bit, this is also smaller, |
| 1004 | state[0 + x] ^= temp; | 1008 | * not only faster, than loop */ |
| 1005 | state[5 + x] ^= temp; | 1009 | state[x] ^= temp; |
| 1006 | state[10 + x] ^= temp; | 1010 | state[x + 5] ^= temp; |
| 1007 | state[15 + x] ^= temp; | 1011 | state[x + 10] ^= temp; |
| 1008 | state[20 + x] ^= temp; | 1012 | state[x + 15] ^= temp; |
| 1013 | state[x + 20] ^= temp; | ||
| 1009 | } | 1014 | } |
| 1010 | } | 1015 | } |
| 1011 | } | 1016 | } |
| @@ -1019,7 +1024,7 @@ static void KeccakF(uint64_t *state) | |||
| 1019 | t1 = t0; | 1024 | t1 = t0; |
| 1020 | } | 1025 | } |
| 1021 | } else { | 1026 | } else { |
| 1022 | /* Especially large benefit for 32-bit arch: | 1027 | /* Especially large benefit for 32-bit arch (75% faster): |
| 1023 | * 64-bit rotations by non-constant usually are SLOW on those. | 1028 | * 64-bit rotations by non-constant usually are SLOW on those. |
| 1024 | * We resort to unrolling here. | 1029 | * We resort to unrolling here. |
| 1025 | * This optimizes out KeccakF_PiLane[] and KeccakF_RotationConstants[], | 1030 | * This optimizes out KeccakF_PiLane[] and KeccakF_RotationConstants[], |
