diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2013-01-15 22:07:48 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2013-01-15 22:07:48 +0100 |
commit | 8fb3ab528e1a640342c04d996e54f7fa668fdce6 (patch) | |
tree | 18363c520b347cf85b215b01e63eb90dbb2bb6c2 | |
parent | 8e7312e12fb088ba99f4f875903926f2ef9ed235 (diff) | |
download | busybox-w32-8fb3ab528e1a640342c04d996e54f7fa668fdce6.tar.gz busybox-w32-8fb3ab528e1a640342c04d996e54f7fa668fdce6.tar.bz2 busybox-w32-8fb3ab528e1a640342c04d996e54f7fa668fdce6.zip |
sha3: remove two "small code" codepaths: I can't reproduce code size win on them anymore
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | libbb/hash_md5_sha.c | 61 |
1 files changed, 18 insertions, 43 deletions
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index 7ae0b6385..60f44cc3d 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c | |||
@@ -977,14 +977,12 @@ static const uint8_t KECCAK_PI_LANE[25] = { | |||
977 | 14, 22, 9, 6, 1 | 977 | 14, 22, 9, 6, 1 |
978 | }; | 978 | }; |
979 | 979 | ||
980 | static const uint8_t MOD5[10] = { | ||
981 | 0, 1, 2, 3, 4, 0, 1, 2, 3, 4 | ||
982 | }; | ||
983 | |||
984 | #define ARCH_IS_64BIT (sizeof(long) >= sizeof(uint64_t)) | 980 | #define ARCH_IS_64BIT (sizeof(long) >= sizeof(uint64_t)) |
985 | 981 | ||
986 | static void KeccakF(uint64_t *state) | 982 | static void KeccakF(uint64_t *state) |
987 | { | 983 | { |
984 | /*static const uint8_t MOD5[10] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4 };*/ | ||
985 | |||
988 | unsigned x, y; | 986 | unsigned x, y; |
989 | unsigned round; | 987 | unsigned round; |
990 | 988 | ||
@@ -1009,18 +1007,11 @@ static void KeccakF(uint64_t *state) | |||
1009 | */ | 1007 | */ |
1010 | for (x = 0; x < 5; ++x) { | 1008 | for (x = 0; x < 5; ++x) { |
1011 | uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1); | 1009 | uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1); |
1012 | if (SHA3_SMALL && !ARCH_IS_64BIT) { | 1010 | state[x] ^= temp; |
1013 | for (y = 0; y <= 20; y += 5) | 1011 | state[x + 5] ^= temp; |
1014 | state[x + y] ^= temp; | 1012 | state[x + 10] ^= temp; |
1015 | } else { | 1013 | state[x + 15] ^= temp; |
1016 | /* On 64-bit, this is also smaller, | 1014 | state[x + 20] ^= temp; |
1017 | * not only faster, than loop */ | ||
1018 | state[x] ^= temp; | ||
1019 | state[x + 5] ^= temp; | ||
1020 | state[x + 10] ^= temp; | ||
1021 | state[x + 15] ^= temp; | ||
1022 | state[x + 20] ^= temp; | ||
1023 | } | ||
1024 | } | 1015 | } |
1025 | } | 1016 | } |
1026 | 1017 | ||
@@ -1057,33 +1048,17 @@ static void KeccakF(uint64_t *state) | |||
1057 | 1048 | ||
1058 | /* Chi */ | 1049 | /* Chi */ |
1059 | for (y = 0; y <= 20; y += 5) { | 1050 | for (y = 0; y <= 20; y += 5) { |
1060 | if (SHA3_SMALL && !ARCH_IS_64BIT) { | 1051 | uint64_t BC0, BC1, BC2, BC3, BC4; |
1061 | uint64_t BC[5]; | 1052 | BC0 = state[y + 0]; |
1062 | BC[0] = state[y + 0]; | 1053 | BC1 = state[y + 1]; |
1063 | BC[1] = state[y + 1]; | 1054 | BC2 = state[y + 2]; |
1064 | BC[2] = state[y + 2]; | 1055 | state[y + 0] = BC0 ^ ((~BC1) & BC2); |
1065 | BC[3] = state[y + 3]; | 1056 | BC3 = state[y + 3]; |
1066 | BC[4] = state[y + 4]; | 1057 | state[y + 1] = BC1 ^ ((~BC2) & BC3); |
1067 | for (x = 0; x < 5; ++x) { | 1058 | BC4 = state[y + 4]; |
1068 | state[y + x] = | 1059 | state[y + 2] = BC2 ^ ((~BC3) & BC4); |
1069 | BC[x] ^ ((~BC[MOD5[x + 1]]) & | 1060 | state[y + 3] = BC3 ^ ((~BC4) & BC0); |
1070 | BC[MOD5[x + 2]]); | 1061 | state[y + 4] = BC4 ^ ((~BC0) & BC1); |
1071 | } | ||
1072 | } else { | ||
1073 | /* 32-bit x86: +50 bytes code, 10% faster */ | ||
1074 | /* 64-bit x86: ~same code size, 30% faster */ | ||
1075 | uint64_t BC0, BC1, BC2, BC3, BC4; | ||
1076 | BC0 = state[y + 0]; | ||
1077 | BC1 = state[y + 1]; | ||
1078 | BC2 = state[y + 2]; | ||
1079 | state[y + 0] = BC0 ^ ((~BC1) & BC2); | ||
1080 | BC3 = state[y + 3]; | ||
1081 | state[y + 1] = BC1 ^ ((~BC2) & BC3); | ||
1082 | BC4 = state[y + 4]; | ||
1083 | state[y + 2] = BC2 ^ ((~BC3) & BC4); | ||
1084 | state[y + 3] = BC3 ^ ((~BC4) & BC0); | ||
1085 | state[y + 4] = BC4 ^ ((~BC0) & BC1); | ||
1086 | } | ||
1087 | } | 1062 | } |
1088 | 1063 | ||
1089 | /* Iota */ | 1064 | /* Iota */ |