sha3: code shrink

function old new delta KeccakF 1064 1053 -11 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
author: Denys Vlasenko <vda.linux@googlemail.com> 2013-01-15 15:22:30 +0100
committer: Denys Vlasenko <vda.linux@googlemail.com> 2013-01-15 15:22:30 +0100
commit: a55df2793660941f42589182537d02ce54eaed66 (patch)
tree: fe4d2df5b05d5f78798c368ab77a5298d98af21b /libbb
parent: 07a54e21dd08bcd752a23095fdedc904eb7127fb (diff)
download: busybox-w32-a55df2793660941f42589182537d02ce54eaed66.tar.gz
busybox-w32-a55df2793660941f42589182537d02ce54eaed66.tar.bz2
busybox-w32-a55df2793660941f42589182537d02ce54eaed66.zip
1 files changed, 18 insertions, 13 deletions
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c
index 3b1366762..a0eec7789 100644
--- a/libbb/hash_md5_sha.c
+++ b/libbb/hash_md5_sha.c
@@ -988,24 +988,29 @@ static void KeccakF(uint64_t *state)
        for (round = 0; round < cKeccakNumberOfRounds; ++round) {
                /* Theta */
                {
-                        uint64_t BC[5];
+                        uint64_t BC[10];
                        for (x = 0; x < 5; ++x) {
-                                BC[x] = state[x] ^ state[5 + x] ^ state[10 + x] ^
+                                BC[x + 5] = BC[x] = state[x]
-                                        state[15 + x] ^ state[20 + x];
+                                        ^ state[x + 5] ^ state[x + 10]
+                                        ^ state[x + 15] ^ state[x + 20];
                        }
+                        /* Using 2x5 vector above eliminates the need to use
+                         * [Mod5[x+N]] index trick below to calculate (x+N) % 5,
+                         * and the code is a bit _smaller_.
+                         */
                        for (x = 0; x < 5; ++x) {
-                                uint64_t temp = BC[KeccakF_Mod5[x + 4]] ^
+                                uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1);
-                                        rotl64(BC[KeccakF_Mod5[x + 1]], 1);
                                if (SHA3_SMALL && !ARCH_IS_64BIT) {
                                        for (y = 0; y <= 20; y += 5)
-                                                state[y + x] ^= temp;
+                                                state[x + y] ^= temp;
                                } else {
-                                        /* on 64-bit arch, this is actually smaller too */
+                                        /* On 64-bit, this is also smaller,
-                                        state[0 + x] ^= temp;
+                                         * not only faster, than loop */
-                                        state[5 + x] ^= temp;
+                                        state[x] ^= temp;
-                                        state[10 + x] ^= temp;
+                                        state[x + 5] ^= temp;
-                                        state[15 + x] ^= temp;
+                                        state[x + 10] ^= temp;
-                                        state[20 + x] ^= temp;
+                                        state[x + 15] ^= temp;
+                                        state[x + 20] ^= temp;
                                }
                        }
                }
@@ -1019,7 +1024,7 @@ static void KeccakF(uint64_t *state)
                                t1 = t0;
                        }
                } else {
-                        /* Especially large benefit for 32-bit arch:
+                        /* Especially large benefit for 32-bit arch (75% faster):
                         * 64-bit rotations by non-constant usually are SLOW on those.
                         * We resort to unrolling here.
                         * This optimizes out KeccakF_PiLane[] and KeccakF_RotationConstants[],
author	Denys Vlasenko <vda.linux@googlemail.com>	2013-01-15 15:22:30 +0100
committer	Denys Vlasenko <vda.linux@googlemail.com>	2013-01-15 15:22:30 +0100
commit	a55df2793660941f42589182537d02ce54eaed66 (patch)
tree	fe4d2df5b05d5f78798c368ab77a5298d98af21b /libbb
parent	07a54e21dd08bcd752a23095fdedc904eb7127fb (diff)
download	busybox-w32-a55df2793660941f42589182537d02ce54eaed66.tar.gz busybox-w32-a55df2793660941f42589182537d02ce54eaed66.tar.bz2 busybox-w32-a55df2793660941f42589182537d02ce54eaed66.zip