aboutsummaryrefslogtreecommitdiff
path: root/libbb
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2013-01-15 01:12:26 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2013-01-15 01:12:26 +0100
commit30a8652fbf16884490cee4a624f039a9ab587269 (patch)
treecf6235392344386a06849e04fd5deb4f364f0a71 /libbb
parent60cb48ca50fcff24aa6c3927f51e4a508fa118f4 (diff)
downloadbusybox-w32-30a8652fbf16884490cee4a624f039a9ab587269.tar.gz
busybox-w32-30a8652fbf16884490cee4a624f039a9ab587269.tar.bz2
busybox-w32-30a8652fbf16884490cee4a624f039a9ab587269.zip
sha3: make size/speed optimization decision configurable
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb')
-rw-r--r--libbb/Config.src10
-rw-r--r--libbb/hash_md5_sha.c77
2 files changed, 68 insertions, 19 deletions
diff --git a/libbb/Config.src b/libbb/Config.src
index ee1b66a45..19021fed1 100644
--- a/libbb/Config.src
+++ b/libbb/Config.src
@@ -28,6 +28,16 @@ config MD5_SMALL
28 2 3.0 5088 28 2 3.0 5088
29 3 (smallest) 5.1 4912 29 3 (smallest) 5.1 4912
30 30
31config SHA3_SMALL
32 int "SHA3: Trade bytes for speed (0:fast, 1:slow)"
33 default 1
34 range 0 1
35 help
36 Trade binary size versus speed for the sha3sum algorithm.
37 SHA3_SMALL=0 compared to SHA3_SMALL=1 (approximate):
38 64-bit x86: +270 bytes of code, 45% faster
39 32-bit x86: +450 bytes of code, 75% faster
40
31config FEATURE_FAST_TOP 41config FEATURE_FAST_TOP
32 bool "Faster /proc scanning code (+100 bytes)" 42 bool "Faster /proc scanning code (+100 bytes)"
33 default y 43 default y
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c
index 06a2400b5..643cf205f 100644
--- a/libbb/hash_md5_sha.c
+++ b/libbb/hash_md5_sha.c
@@ -918,6 +918,16 @@ void FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf)
918 * Busybox modifications (C) Lauri Kasanen, under the GPLv2. 918 * Busybox modifications (C) Lauri Kasanen, under the GPLv2.
919 */ 919 */
920 920
921#if CONFIG_SHA3_SMALL < 0
922# define SHA3_SMALL 0
923#elif CONFIG_SHA3_SMALL > 1
924# define SHA3_SMALL 1
925#else
926# define SHA3_SMALL CONFIG_SHA3_SMALL
927#endif
928
929#define ARCH_IS_64BIT (sizeof(long) >= sizeof(uint64_t))
930
921enum { 931enum {
922 cKeccakR_SizeInBytes = 576 / 8, 932 cKeccakR_SizeInBytes = 576 / 8,
923 cKeccakNumberOfRounds = 24, 933 cKeccakNumberOfRounds = 24,
@@ -967,8 +977,6 @@ static const uint8_t KeccakF_Mod5[10] = {
967static void KeccakF(uint64_t *state) 977static void KeccakF(uint64_t *state)
968{ 978{
969 uint8_t x, y; 979 uint8_t x, y;
970 uint64_t temp;
971 uint64_t BC[5];
972 int round; 980 int round;
973 981
974 if (BB_BIG_ENDIAN) { 982 if (BB_BIG_ENDIAN) {
@@ -979,30 +987,61 @@ static void KeccakF(uint64_t *state)
979 987
980 for (round = 0; round < cKeccakNumberOfRounds; ++round) { 988 for (round = 0; round < cKeccakNumberOfRounds; ++round) {
981 /* Theta */ 989 /* Theta */
982 for (x = 0; x < 5; ++x) { 990 {
983 BC[x] = state[x] ^ state[5 + x] ^ state[10 + x] ^ 991 uint64_t BC[5];
984 state[15 + x] ^ state[20 + x]; 992 for (x = 0; x < 5; ++x) {
985 } 993 BC[x] = state[x] ^ state[5 + x] ^ state[10 + x] ^
986 for (x = 0; x < 5; ++x) { 994 state[15 + x] ^ state[20 + x];
987 temp = BC[KeccakF_Mod5[x + 4]] ^ 995 }
988 rotl64(BC[KeccakF_Mod5[x + 1]], 1); 996 for (x = 0; x < 5; ++x) {
989 997 uint64_t temp = BC[KeccakF_Mod5[x + 4]] ^
990 for (y = 0; y <= 20; y += 5) { 998 rotl64(BC[KeccakF_Mod5[x + 1]], 1);
991 state[y + x] ^= temp; 999 if (SHA3_SMALL && !ARCH_IS_64BIT) {
1000 for (y = 0; y <= 20; y += 5)
1001 state[y + x] ^= temp;
1002 } else {
1003 /* on 64-bit arch, this is actually smaller too */
1004 state[0 + x] ^= temp;
1005 state[5 + x] ^= temp;
1006 state[10 + x] ^= temp;
1007 state[15 + x] ^= temp;
1008 state[20 + x] ^= temp;
1009 }
992 } 1010 }
993 } 1011 }
994 1012
995 /* Rho Pi */ 1013 /* Rho Pi */
996 temp = state[1]; 1014 if (SHA3_SMALL) {
997 for (x = 0; x < 24; ++x) { 1015 uint64_t t1 = state[1];
998 BC[0] = state[KeccakF_PiLane[x]]; 1016 for (x = 0; x < 24; ++x) {
999 state[KeccakF_PiLane[x]] = 1017 uint64_t t0 = state[KeccakF_PiLane[x]];
1000 rotl64(temp, KeccakF_RotationConstants[x]); 1018 state[KeccakF_PiLane[x]] = rotl64(t1, KeccakF_RotationConstants[x]);
1001 temp = BC[0]; 1019 t1 = t0;
1020 }
1021 } else {
1022 /* Especially large benefit for 32-bit arch:
1023 * 64-bit rotations by non-constant usually are SLOW on those.
1024 * We resort to unrolling here.
1025 * This optimizes out KeccakF_PiLane[] and KeccakF_RotationConstants[],
1026 * but generates 300-500 more bytes of code.
1027 */
1028 uint64_t t0;
1029 uint64_t t1 = state[1];
1030#define RhoPi_twice(x) \
1031 t0 = state[KeccakF_PiLane[x ]]; state[KeccakF_PiLane[x ]] = rotl64(t1, KeccakF_RotationConstants[x ]); \
1032 t1 = state[KeccakF_PiLane[x+1]]; state[KeccakF_PiLane[x+1]] = rotl64(t0, KeccakF_RotationConstants[x+1]);
1033 RhoPi_twice(0); RhoPi_twice(2);
1034 RhoPi_twice(4); RhoPi_twice(6);
1035 RhoPi_twice(8); RhoPi_twice(10);
1036 RhoPi_twice(12); RhoPi_twice(14);
1037 RhoPi_twice(16); RhoPi_twice(18);
1038 RhoPi_twice(20); RhoPi_twice(22);
1039#undef RhoPi_twice
1002 } 1040 }
1003 1041
1004 /* Chi */ 1042 /* Chi */
1005 for (y = 0; y < 25; y += 5) { 1043 for (y = 0; y <= 20; y += 5) {
1044 uint64_t BC[5];
1006 BC[0] = state[y + 0]; 1045 BC[0] = state[y + 0];
1007 BC[1] = state[y + 1]; 1046 BC[1] = state[y + 1];
1008 BC[2] = state[y + 2]; 1047 BC[2] = state[y + 2];