diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2013-01-15 01:12:26 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2013-01-15 01:12:26 +0100 |
commit | 30a8652fbf16884490cee4a624f039a9ab587269 (patch) | |
tree | cf6235392344386a06849e04fd5deb4f364f0a71 /libbb | |
parent | 60cb48ca50fcff24aa6c3927f51e4a508fa118f4 (diff) | |
download | busybox-w32-30a8652fbf16884490cee4a624f039a9ab587269.tar.gz busybox-w32-30a8652fbf16884490cee4a624f039a9ab587269.tar.bz2 busybox-w32-30a8652fbf16884490cee4a624f039a9ab587269.zip |
sha3: make size/speed optimization decision configurable
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb')
-rw-r--r-- | libbb/Config.src | 10 | ||||
-rw-r--r-- | libbb/hash_md5_sha.c | 77 |
2 files changed, 68 insertions, 19 deletions
diff --git a/libbb/Config.src b/libbb/Config.src index ee1b66a45..19021fed1 100644 --- a/libbb/Config.src +++ b/libbb/Config.src | |||
@@ -28,6 +28,16 @@ config MD5_SMALL | |||
28 | 2 3.0 5088 | 28 | 2 3.0 5088 |
29 | 3 (smallest) 5.1 4912 | 29 | 3 (smallest) 5.1 4912 |
30 | 30 | ||
31 | config SHA3_SMALL | ||
32 | int "SHA3: Trade bytes for speed (0:fast, 1:slow)" | ||
33 | default 1 | ||
34 | range 0 1 | ||
35 | help | ||
36 | Trade binary size versus speed for the sha3sum algorithm. | ||
37 | SHA3_SMALL=0 compared to SHA3_SMALL=1 (approximate): | ||
38 | 64-bit x86: +270 bytes of code, 45% faster | ||
39 | 32-bit x86: +450 bytes of code, 75% faster | ||
40 | |||
31 | config FEATURE_FAST_TOP | 41 | config FEATURE_FAST_TOP |
32 | bool "Faster /proc scanning code (+100 bytes)" | 42 | bool "Faster /proc scanning code (+100 bytes)" |
33 | default y | 43 | default y |
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index 06a2400b5..643cf205f 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c | |||
@@ -918,6 +918,16 @@ void FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf) | |||
918 | * Busybox modifications (C) Lauri Kasanen, under the GPLv2. | 918 | * Busybox modifications (C) Lauri Kasanen, under the GPLv2. |
919 | */ | 919 | */ |
920 | 920 | ||
921 | #if CONFIG_SHA3_SMALL < 0 | ||
922 | # define SHA3_SMALL 0 | ||
923 | #elif CONFIG_SHA3_SMALL > 1 | ||
924 | # define SHA3_SMALL 1 | ||
925 | #else | ||
926 | # define SHA3_SMALL CONFIG_SHA3_SMALL | ||
927 | #endif | ||
928 | |||
929 | #define ARCH_IS_64BIT (sizeof(long) >= sizeof(uint64_t)) | ||
930 | |||
921 | enum { | 931 | enum { |
922 | cKeccakR_SizeInBytes = 576 / 8, | 932 | cKeccakR_SizeInBytes = 576 / 8, |
923 | cKeccakNumberOfRounds = 24, | 933 | cKeccakNumberOfRounds = 24, |
@@ -967,8 +977,6 @@ static const uint8_t KeccakF_Mod5[10] = { | |||
967 | static void KeccakF(uint64_t *state) | 977 | static void KeccakF(uint64_t *state) |
968 | { | 978 | { |
969 | uint8_t x, y; | 979 | uint8_t x, y; |
970 | uint64_t temp; | ||
971 | uint64_t BC[5]; | ||
972 | int round; | 980 | int round; |
973 | 981 | ||
974 | if (BB_BIG_ENDIAN) { | 982 | if (BB_BIG_ENDIAN) { |
@@ -979,30 +987,61 @@ static void KeccakF(uint64_t *state) | |||
979 | 987 | ||
980 | for (round = 0; round < cKeccakNumberOfRounds; ++round) { | 988 | for (round = 0; round < cKeccakNumberOfRounds; ++round) { |
981 | /* Theta */ | 989 | /* Theta */ |
982 | for (x = 0; x < 5; ++x) { | 990 | { |
983 | BC[x] = state[x] ^ state[5 + x] ^ state[10 + x] ^ | 991 | uint64_t BC[5]; |
984 | state[15 + x] ^ state[20 + x]; | 992 | for (x = 0; x < 5; ++x) { |
985 | } | 993 | BC[x] = state[x] ^ state[5 + x] ^ state[10 + x] ^ |
986 | for (x = 0; x < 5; ++x) { | 994 | state[15 + x] ^ state[20 + x]; |
987 | temp = BC[KeccakF_Mod5[x + 4]] ^ | 995 | } |
988 | rotl64(BC[KeccakF_Mod5[x + 1]], 1); | 996 | for (x = 0; x < 5; ++x) { |
989 | 997 | uint64_t temp = BC[KeccakF_Mod5[x + 4]] ^ | |
990 | for (y = 0; y <= 20; y += 5) { | 998 | rotl64(BC[KeccakF_Mod5[x + 1]], 1); |
991 | state[y + x] ^= temp; | 999 | if (SHA3_SMALL && !ARCH_IS_64BIT) { |
1000 | for (y = 0; y <= 20; y += 5) | ||
1001 | state[y + x] ^= temp; | ||
1002 | } else { | ||
1003 | /* on 64-bit arch, this is actually smaller too */ | ||
1004 | state[0 + x] ^= temp; | ||
1005 | state[5 + x] ^= temp; | ||
1006 | state[10 + x] ^= temp; | ||
1007 | state[15 + x] ^= temp; | ||
1008 | state[20 + x] ^= temp; | ||
1009 | } | ||
992 | } | 1010 | } |
993 | } | 1011 | } |
994 | 1012 | ||
995 | /* Rho Pi */ | 1013 | /* Rho Pi */ |
996 | temp = state[1]; | 1014 | if (SHA3_SMALL) { |
997 | for (x = 0; x < 24; ++x) { | 1015 | uint64_t t1 = state[1]; |
998 | BC[0] = state[KeccakF_PiLane[x]]; | 1016 | for (x = 0; x < 24; ++x) { |
999 | state[KeccakF_PiLane[x]] = | 1017 | uint64_t t0 = state[KeccakF_PiLane[x]]; |
1000 | rotl64(temp, KeccakF_RotationConstants[x]); | 1018 | state[KeccakF_PiLane[x]] = rotl64(t1, KeccakF_RotationConstants[x]); |
1001 | temp = BC[0]; | 1019 | t1 = t0; |
1020 | } | ||
1021 | } else { | ||
1022 | /* Especially large benefit for 32-bit arch: | ||
1023 | * 64-bit rotations by non-constant usually are SLOW on those. | ||
1024 | * We resort to unrolling here. | ||
1025 | * This optimizes out KeccakF_PiLane[] and KeccakF_RotationConstants[], | ||
1026 | * but generates 300-500 more bytes of code. | ||
1027 | */ | ||
1028 | uint64_t t0; | ||
1029 | uint64_t t1 = state[1]; | ||
1030 | #define RhoPi_twice(x) \ | ||
1031 | t0 = state[KeccakF_PiLane[x ]]; state[KeccakF_PiLane[x ]] = rotl64(t1, KeccakF_RotationConstants[x ]); \ | ||
1032 | t1 = state[KeccakF_PiLane[x+1]]; state[KeccakF_PiLane[x+1]] = rotl64(t0, KeccakF_RotationConstants[x+1]); | ||
1033 | RhoPi_twice(0); RhoPi_twice(2); | ||
1034 | RhoPi_twice(4); RhoPi_twice(6); | ||
1035 | RhoPi_twice(8); RhoPi_twice(10); | ||
1036 | RhoPi_twice(12); RhoPi_twice(14); | ||
1037 | RhoPi_twice(16); RhoPi_twice(18); | ||
1038 | RhoPi_twice(20); RhoPi_twice(22); | ||
1039 | #undef RhoPi_twice | ||
1002 | } | 1040 | } |
1003 | 1041 | ||
1004 | /* Chi */ | 1042 | /* Chi */ |
1005 | for (y = 0; y < 25; y += 5) { | 1043 | for (y = 0; y <= 20; y += 5) { |
1044 | uint64_t BC[5]; | ||
1006 | BC[0] = state[y + 0]; | 1045 | BC[0] = state[y + 0]; |
1007 | BC[1] = state[y + 1]; | 1046 | BC[1] = state[y + 1]; |
1008 | BC[2] = state[y + 2]; | 1047 | BC[2] = state[y + 2]; |