diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2013-01-16 02:20:31 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2013-01-16 02:20:31 +0100 |
commit | 5368fe541c023130843cea361d779addb936b95c (patch) | |
tree | e358717dcc6de60ff07ec7e2137b005ad1ebd595 | |
parent | 406ea15e2c108c949f8b1a3547eecf4dd9509724 (diff) | |
download | busybox-w32-5368fe541c023130843cea361d779addb936b95c.tar.gz busybox-w32-5368fe541c023130843cea361d779addb936b95c.tar.bz2 busybox-w32-5368fe541c023130843cea361d779addb936b95c.zip |
sha3: rename KeccakF->sha3_process_block76.
This brings the naming more in line with other hashes.
Pulled most statics and constants into it.
Also noticed that two byte arrays are 1 element too big.
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | libbb/hash_md5_sha.c | 160 |
1 files changed, 81 insertions, 79 deletions
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index d143fc651..15588dcfe 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c | |||
@@ -190,10 +190,9 @@ static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx) | |||
190 | int i; | 190 | int i; |
191 | uint32_t temp; | 191 | uint32_t temp; |
192 | 192 | ||
193 | # if BB_BIG_ENDIAN | 193 | if (BB_BIG_ENDIAN) |
194 | for (i = 0; i < 16; i++) | 194 | for (i = 0; i < 16; i++) |
195 | words[i] = SWAP_LE32(words[i]); | 195 | words[i] = SWAP_LE32(words[i]); |
196 | # endif | ||
197 | 196 | ||
198 | # if MD5_SMALL == 3 | 197 | # if MD5_SMALL == 3 |
199 | pc = C_array; | 198 | pc = C_array; |
@@ -467,12 +466,13 @@ void FAST_FUNC md5_end(md5_ctx_t *ctx, void *resbuf) | |||
467 | common64_end(ctx, /*swap_needed:*/ BB_BIG_ENDIAN); | 466 | common64_end(ctx, /*swap_needed:*/ BB_BIG_ENDIAN); |
468 | 467 | ||
469 | /* The MD5 result is in little endian byte order */ | 468 | /* The MD5 result is in little endian byte order */ |
470 | #if BB_BIG_ENDIAN | 469 | if (BB_BIG_ENDIAN) { |
471 | ctx->hash[0] = SWAP_LE32(ctx->hash[0]); | 470 | ctx->hash[0] = SWAP_LE32(ctx->hash[0]); |
472 | ctx->hash[1] = SWAP_LE32(ctx->hash[1]); | 471 | ctx->hash[1] = SWAP_LE32(ctx->hash[1]); |
473 | ctx->hash[2] = SWAP_LE32(ctx->hash[2]); | 472 | ctx->hash[2] = SWAP_LE32(ctx->hash[2]); |
474 | ctx->hash[3] = SWAP_LE32(ctx->hash[3]); | 473 | ctx->hash[3] = SWAP_LE32(ctx->hash[3]); |
475 | #endif | 474 | } |
475 | |||
476 | memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4); | 476 | memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4); |
477 | } | 477 | } |
478 | 478 | ||
@@ -927,59 +927,61 @@ void FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf) | |||
927 | #endif | 927 | #endif |
928 | 928 | ||
929 | enum { | 929 | enum { |
930 | KECCAK_IBLK_BYTES = 576 / 8, | 930 | SHA3_IBLK_BYTES = 72, /* 576 bits / 8 */ |
931 | KECCAK_NROUNDS = 24, | ||
932 | }; | 931 | }; |
933 | 932 | ||
934 | /* Elements should be 64-bit, but top half is always zero or 0x80000000. | 933 | /* |
935 | * We encode 63rd bits in a separate word below. | 934 | * In the crypto literature this function is usually called Keccak-f(). |
936 | * Same is true for 31th bits, which lets us use 16-bit table instead of 64-bit. | ||
937 | * The speed penalty is lost in the noise. | ||
938 | */ | 935 | */ |
939 | static const uint16_t KECCAK_IOTA_CONST[KECCAK_NROUNDS] = { | 936 | static void sha3_process_block76(uint64_t *state) |
940 | 0x0001U, | ||
941 | 0x8082U, | ||
942 | 0x808aU, | ||
943 | 0x8000U, | ||
944 | 0x808bU, | ||
945 | 0x0001U, | ||
946 | 0x8081U, | ||
947 | 0x8009U, | ||
948 | 0x008aU, | ||
949 | 0x0088U, | ||
950 | 0x8009U, | ||
951 | 0x000aU, | ||
952 | 0x808bU, | ||
953 | 0x008bU, | ||
954 | 0x8089U, | ||
955 | 0x8003U, | ||
956 | 0x8002U, | ||
957 | 0x0080U, | ||
958 | 0x800aU, | ||
959 | 0x000aU, | ||
960 | 0x8081U, | ||
961 | 0x8080U, | ||
962 | 0x0001U, | ||
963 | 0x8008U, | ||
964 | }; | ||
965 | /* bit from CONST[0] is msb: 0011 0011 0000 0111 1101 1101 */ | ||
966 | #define KECCAK_IOTA_CONST_bit63 ((uint32_t)(0x3307dd00)) | ||
967 | /* bit from CONST[0] is msb: 0001 0110 0011 1000 0001 1011 */ | ||
968 | #define KECCAK_IOTA_CONST_bit31 ((uint32_t)(0x16381b00)) | ||
969 | |||
970 | static const uint8_t KECCAK_ROT_CONST[25] = { | ||
971 | 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, 27, 41, 56, 8, 25, 43, 62, | ||
972 | 18, 39, 61, 20, 44 | ||
973 | }; | ||
974 | |||
975 | static const uint8_t KECCAK_PI_LANE[25] = { | ||
976 | 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, 15, 23, 19, 13, 12, 2, 20, | ||
977 | 14, 22, 9, 6, 1 | ||
978 | }; | ||
979 | |||
980 | static void KeccakF(uint64_t *state) | ||
981 | { | 937 | { |
982 | /*static const uint8_t MOD5[10] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4 };*/ | 938 | enum { NROUNDS = 24 }; |
939 | |||
940 | /* Elements should be 64-bit, but top half is always zero or 0x80000000. | ||
941 | * We encode 63rd bits in a separate word below. | ||
942 | * Same is true for 31th bits, which lets us use 16-bit table instead of 64-bit. | ||
943 | * The speed penalty is lost in the noise. | ||
944 | */ | ||
945 | static const uint16_t IOTA_CONST[NROUNDS] = { | ||
946 | 0x0001, | ||
947 | 0x8082, | ||
948 | 0x808a, | ||
949 | 0x8000, | ||
950 | 0x808b, | ||
951 | 0x0001, | ||
952 | 0x8081, | ||
953 | 0x8009, | ||
954 | 0x008a, | ||
955 | 0x0088, | ||
956 | 0x8009, | ||
957 | 0x000a, | ||
958 | 0x808b, | ||
959 | 0x008b, | ||
960 | 0x8089, | ||
961 | 0x8003, | ||
962 | 0x8002, | ||
963 | 0x0080, | ||
964 | 0x800a, | ||
965 | 0x000a, | ||
966 | 0x8081, | ||
967 | 0x8080, | ||
968 | 0x0001, | ||
969 | 0x8008, | ||
970 | }; | ||
971 | /* bit for CONST[0] is in msb: 0011 0011 0000 0111 1101 1101 */ | ||
972 | const uint32_t IOTA_CONST_bit63 = (uint32_t)(0x3307dd00); | ||
973 | /* bit for CONST[0] is in msb: 0001 0110 0011 1000 0001 1011 */ | ||
974 | const uint32_t IOTA_CONST_bit31 = (uint32_t)(0x16381b00); | ||
975 | |||
976 | static const uint8_t ROT_CONST[24] = { | ||
977 | 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, | ||
978 | 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44, | ||
979 | }; | ||
980 | static const uint8_t PI_LANE[24] = { | ||
981 | 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, | ||
982 | 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1, | ||
983 | }; | ||
984 | /*static const uint8_t MOD5[10] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, };*/ | ||
983 | 985 | ||
984 | unsigned x, y; | 986 | unsigned x, y; |
985 | unsigned round; | 987 | unsigned round; |
@@ -990,7 +992,7 @@ static void KeccakF(uint64_t *state) | |||
990 | } | 992 | } |
991 | } | 993 | } |
992 | 994 | ||
993 | for (round = 0; round < KECCAK_NROUNDS; ++round) { | 995 | for (round = 0; round < NROUNDS; ++round) { |
994 | /* Theta */ | 996 | /* Theta */ |
995 | { | 997 | { |
996 | uint64_t BC[10]; | 998 | uint64_t BC[10]; |
@@ -1017,24 +1019,24 @@ static void KeccakF(uint64_t *state) | |||
1017 | if (SHA3_SMALL) { | 1019 | if (SHA3_SMALL) { |
1018 | uint64_t t1 = state[1]; | 1020 | uint64_t t1 = state[1]; |
1019 | for (x = 0; x < 24; ++x) { | 1021 | for (x = 0; x < 24; ++x) { |
1020 | uint64_t t0 = state[KECCAK_PI_LANE[x]]; | 1022 | uint64_t t0 = state[PI_LANE[x]]; |
1021 | state[KECCAK_PI_LANE[x]] = rotl64(t1, KECCAK_ROT_CONST[x]); | 1023 | state[PI_LANE[x]] = rotl64(t1, ROT_CONST[x]); |
1022 | t1 = t0; | 1024 | t1 = t0; |
1023 | } | 1025 | } |
1024 | } else { | 1026 | } else { |
1025 | /* Especially large benefit for 32-bit arch (75% faster): | 1027 | /* Especially large benefit for 32-bit arch (75% faster): |
1026 | * 64-bit rotations by non-constant usually are SLOW on those. | 1028 | * 64-bit rotations by non-constant usually are SLOW on those. |
1027 | * We resort to unrolling here. | 1029 | * We resort to unrolling here. |
1028 | * This optimizes out KECCAK_PI_LANE[] and KECCAK_ROT_CONST[], | 1030 | * This optimizes out PI_LANE[] and ROT_CONST[], |
1029 | * but generates 300-500 more bytes of code. | 1031 | * but generates 300-500 more bytes of code. |
1030 | */ | 1032 | */ |
1031 | uint64_t t0; | 1033 | uint64_t t0; |
1032 | uint64_t t1 = state[1]; | 1034 | uint64_t t1 = state[1]; |
1033 | #define RhoPi_twice(x) \ | 1035 | #define RhoPi_twice(x) \ |
1034 | t0 = state[KECCAK_PI_LANE[x ]]; \ | 1036 | t0 = state[PI_LANE[x ]]; \ |
1035 | state[KECCAK_PI_LANE[x ]] = rotl64(t1, KECCAK_ROT_CONST[x ]); \ | 1037 | state[PI_LANE[x ]] = rotl64(t1, ROT_CONST[x ]); \ |
1036 | t1 = state[KECCAK_PI_LANE[x+1]]; \ | 1038 | t1 = state[PI_LANE[x+1]]; \ |
1037 | state[KECCAK_PI_LANE[x+1]] = rotl64(t0, KECCAK_ROT_CONST[x+1]); | 1039 | state[PI_LANE[x+1]] = rotl64(t0, ROT_CONST[x+1]); |
1038 | RhoPi_twice(0); RhoPi_twice(2); | 1040 | RhoPi_twice(0); RhoPi_twice(2); |
1039 | RhoPi_twice(4); RhoPi_twice(6); | 1041 | RhoPi_twice(4); RhoPi_twice(6); |
1040 | RhoPi_twice(8); RhoPi_twice(10); | 1042 | RhoPi_twice(8); RhoPi_twice(10); |
@@ -1060,9 +1062,9 @@ static void KeccakF(uint64_t *state) | |||
1060 | } | 1062 | } |
1061 | 1063 | ||
1062 | /* Iota */ | 1064 | /* Iota */ |
1063 | state[0] ^= KECCAK_IOTA_CONST[round] | 1065 | state[0] ^= IOTA_CONST[round] |
1064 | | (uint32_t)((KECCAK_IOTA_CONST_bit31 << round) & 0x80000000) | 1066 | | (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000) |
1065 | | (uint64_t)((KECCAK_IOTA_CONST_bit63 << round) & 0x80000000) << 32; | 1067 | | (uint64_t)((IOTA_CONST_bit63 << round) & 0x80000000) << 32; |
1066 | } | 1068 | } |
1067 | 1069 | ||
1068 | if (BB_BIG_ENDIAN) { | 1070 | if (BB_BIG_ENDIAN) { |
@@ -1088,19 +1090,19 @@ void FAST_FUNC sha3_hash(sha3_ctx_t *ctx, const void *buf, size_t bytes) | |||
1088 | buffer[bytes_queued] ^= *data++; | 1090 | buffer[bytes_queued] ^= *data++; |
1089 | bytes--; | 1091 | bytes--; |
1090 | bytes_queued++; | 1092 | bytes_queued++; |
1091 | if (bytes_queued == KECCAK_IBLK_BYTES) { | 1093 | if (bytes_queued == SHA3_IBLK_BYTES) { |
1092 | KeccakF(ctx->state); | 1094 | sha3_process_block76(ctx->state); |
1093 | bytes_queued = 0; | 1095 | bytes_queued = 0; |
1094 | } | 1096 | } |
1095 | } | 1097 | } |
1096 | 1098 | ||
1097 | /* Absorb complete blocks */ | 1099 | /* Absorb complete blocks */ |
1098 | while (bytes >= KECCAK_IBLK_BYTES) { | 1100 | while (bytes >= SHA3_IBLK_BYTES) { |
1099 | /* XOR data onto beginning of state[]. | 1101 | /* XOR data onto beginning of state[]. |
1100 | * We try to be efficient - operate on word at a time, not byte. | 1102 | * We try to be efficient - operate on word at a time, not byte. |
1101 | * Yet safe wrt unaligned access: can't just use "*(long*)data"... | 1103 | * Yet safe wrt unaligned access: can't just use "*(long*)data"... |
1102 | */ | 1104 | */ |
1103 | unsigned count = KECCAK_IBLK_BYTES / sizeof(long); | 1105 | unsigned count = SHA3_IBLK_BYTES / sizeof(long); |
1104 | long *buffer = (long*)ctx->state; | 1106 | long *buffer = (long*)ctx->state; |
1105 | do { | 1107 | do { |
1106 | long v; | 1108 | long v; |
@@ -1109,9 +1111,9 @@ void FAST_FUNC sha3_hash(sha3_ctx_t *ctx, const void *buf, size_t bytes) | |||
1109 | data += sizeof(long); | 1111 | data += sizeof(long); |
1110 | } while (--count); | 1112 | } while (--count); |
1111 | 1113 | ||
1112 | KeccakF(ctx->state); | 1114 | sha3_process_block76(ctx->state); |
1113 | 1115 | ||
1114 | bytes -= KECCAK_IBLK_BYTES; | 1116 | bytes -= SHA3_IBLK_BYTES; |
1115 | } | 1117 | } |
1116 | 1118 | ||
1117 | /* Queue remaining data bytes */ | 1119 | /* Queue remaining data bytes */ |
@@ -1129,10 +1131,10 @@ void FAST_FUNC sha3_end(sha3_ctx_t *ctx, uint8_t *hashval) | |||
1129 | { | 1131 | { |
1130 | /* Padding */ | 1132 | /* Padding */ |
1131 | uint8_t *buffer = (uint8_t*)ctx->state; | 1133 | uint8_t *buffer = (uint8_t*)ctx->state; |
1132 | buffer[ctx->bytes_queued] ^= 1; | 1134 | buffer[ctx->bytes_queued] ^= 1; |
1133 | buffer[KECCAK_IBLK_BYTES - 1] ^= 0x80; | 1135 | buffer[SHA3_IBLK_BYTES - 1] ^= 0x80; |
1134 | 1136 | ||
1135 | KeccakF(ctx->state); | 1137 | sha3_process_block76(ctx->state); |
1136 | 1138 | ||
1137 | /* Output */ | 1139 | /* Output */ |
1138 | memcpy(hashval, ctx->state, 64); | 1140 | memcpy(hashval, ctx->state, 64); |