diff options
| author | Denys Vlasenko <vda.linux@googlemail.com> | 2013-01-15 22:07:48 +0100 |
|---|---|---|
| committer | Denys Vlasenko <vda.linux@googlemail.com> | 2013-01-15 22:07:48 +0100 |
| commit | 8fb3ab528e1a640342c04d996e54f7fa668fdce6 (patch) | |
| tree | 18363c520b347cf85b215b01e63eb90dbb2bb6c2 /libbb | |
| parent | 8e7312e12fb088ba99f4f875903926f2ef9ed235 (diff) | |
| download | busybox-w32-8fb3ab528e1a640342c04d996e54f7fa668fdce6.tar.gz busybox-w32-8fb3ab528e1a640342c04d996e54f7fa668fdce6.tar.bz2 busybox-w32-8fb3ab528e1a640342c04d996e54f7fa668fdce6.zip | |
sha3: remove two "small code" codepaths: I can't reproduce code size win on them anymore
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb')
| -rw-r--r-- | libbb/hash_md5_sha.c | 61 |
1 files changed, 18 insertions, 43 deletions
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index 7ae0b6385..60f44cc3d 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c | |||
| @@ -977,14 +977,12 @@ static const uint8_t KECCAK_PI_LANE[25] = { | |||
| 977 | 14, 22, 9, 6, 1 | 977 | 14, 22, 9, 6, 1 |
| 978 | }; | 978 | }; |
| 979 | 979 | ||
| 980 | static const uint8_t MOD5[10] = { | ||
| 981 | 0, 1, 2, 3, 4, 0, 1, 2, 3, 4 | ||
| 982 | }; | ||
| 983 | |||
| 984 | #define ARCH_IS_64BIT (sizeof(long) >= sizeof(uint64_t)) | 980 | #define ARCH_IS_64BIT (sizeof(long) >= sizeof(uint64_t)) |
| 985 | 981 | ||
| 986 | static void KeccakF(uint64_t *state) | 982 | static void KeccakF(uint64_t *state) |
| 987 | { | 983 | { |
| 984 | /*static const uint8_t MOD5[10] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4 };*/ | ||
| 985 | |||
| 988 | unsigned x, y; | 986 | unsigned x, y; |
| 989 | unsigned round; | 987 | unsigned round; |
| 990 | 988 | ||
| @@ -1009,18 +1007,11 @@ static void KeccakF(uint64_t *state) | |||
| 1009 | */ | 1007 | */ |
| 1010 | for (x = 0; x < 5; ++x) { | 1008 | for (x = 0; x < 5; ++x) { |
| 1011 | uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1); | 1009 | uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1); |
| 1012 | if (SHA3_SMALL && !ARCH_IS_64BIT) { | 1010 | state[x] ^= temp; |
| 1013 | for (y = 0; y <= 20; y += 5) | 1011 | state[x + 5] ^= temp; |
| 1014 | state[x + y] ^= temp; | 1012 | state[x + 10] ^= temp; |
| 1015 | } else { | 1013 | state[x + 15] ^= temp; |
| 1016 | /* On 64-bit, this is also smaller, | 1014 | state[x + 20] ^= temp; |
| 1017 | * not only faster, than loop */ | ||
| 1018 | state[x] ^= temp; | ||
| 1019 | state[x + 5] ^= temp; | ||
| 1020 | state[x + 10] ^= temp; | ||
| 1021 | state[x + 15] ^= temp; | ||
| 1022 | state[x + 20] ^= temp; | ||
| 1023 | } | ||
| 1024 | } | 1015 | } |
| 1025 | } | 1016 | } |
| 1026 | 1017 | ||
| @@ -1057,33 +1048,17 @@ static void KeccakF(uint64_t *state) | |||
| 1057 | 1048 | ||
| 1058 | /* Chi */ | 1049 | /* Chi */ |
| 1059 | for (y = 0; y <= 20; y += 5) { | 1050 | for (y = 0; y <= 20; y += 5) { |
| 1060 | if (SHA3_SMALL && !ARCH_IS_64BIT) { | 1051 | uint64_t BC0, BC1, BC2, BC3, BC4; |
| 1061 | uint64_t BC[5]; | 1052 | BC0 = state[y + 0]; |
| 1062 | BC[0] = state[y + 0]; | 1053 | BC1 = state[y + 1]; |
| 1063 | BC[1] = state[y + 1]; | 1054 | BC2 = state[y + 2]; |
| 1064 | BC[2] = state[y + 2]; | 1055 | state[y + 0] = BC0 ^ ((~BC1) & BC2); |
| 1065 | BC[3] = state[y + 3]; | 1056 | BC3 = state[y + 3]; |
| 1066 | BC[4] = state[y + 4]; | 1057 | state[y + 1] = BC1 ^ ((~BC2) & BC3); |
| 1067 | for (x = 0; x < 5; ++x) { | 1058 | BC4 = state[y + 4]; |
| 1068 | state[y + x] = | 1059 | state[y + 2] = BC2 ^ ((~BC3) & BC4); |
| 1069 | BC[x] ^ ((~BC[MOD5[x + 1]]) & | 1060 | state[y + 3] = BC3 ^ ((~BC4) & BC0); |
| 1070 | BC[MOD5[x + 2]]); | 1061 | state[y + 4] = BC4 ^ ((~BC0) & BC1); |
| 1071 | } | ||
| 1072 | } else { | ||
| 1073 | /* 32-bit x86: +50 bytes code, 10% faster */ | ||
| 1074 | /* 64-bit x86: ~same code size, 30% faster */ | ||
| 1075 | uint64_t BC0, BC1, BC2, BC3, BC4; | ||
| 1076 | BC0 = state[y + 0]; | ||
| 1077 | BC1 = state[y + 1]; | ||
| 1078 | BC2 = state[y + 2]; | ||
| 1079 | state[y + 0] = BC0 ^ ((~BC1) & BC2); | ||
| 1080 | BC3 = state[y + 3]; | ||
| 1081 | state[y + 1] = BC1 ^ ((~BC2) & BC3); | ||
| 1082 | BC4 = state[y + 4]; | ||
| 1083 | state[y + 2] = BC2 ^ ((~BC3) & BC4); | ||
| 1084 | state[y + 3] = BC3 ^ ((~BC4) & BC0); | ||
| 1085 | state[y + 4] = BC4 ^ ((~BC0) & BC1); | ||
| 1086 | } | ||
| 1087 | } | 1062 | } |
| 1088 | 1063 | ||
| 1089 | /* Iota */ | 1064 | /* Iota */ |
