aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2013-01-16 02:20:31 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2013-01-16 02:20:31 +0100
commit5368fe541c023130843cea361d779addb936b95c (patch)
treee358717dcc6de60ff07ec7e2137b005ad1ebd595
parent406ea15e2c108c949f8b1a3547eecf4dd9509724 (diff)
downloadbusybox-w32-5368fe541c023130843cea361d779addb936b95c.tar.gz
busybox-w32-5368fe541c023130843cea361d779addb936b95c.tar.bz2
busybox-w32-5368fe541c023130843cea361d779addb936b95c.zip
sha3: rename KeccakF->sha3_process_block76.
This brings the naming more in line with other hashes. Pulled most statics and constants into it. Also noticed that two byte arrays are 1 element too big. Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--libbb/hash_md5_sha.c160
1 files changed, 81 insertions, 79 deletions
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c
index d143fc651..15588dcfe 100644
--- a/libbb/hash_md5_sha.c
+++ b/libbb/hash_md5_sha.c
@@ -190,10 +190,9 @@ static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx)
190 int i; 190 int i;
191 uint32_t temp; 191 uint32_t temp;
192 192
193# if BB_BIG_ENDIAN 193 if (BB_BIG_ENDIAN)
194 for (i = 0; i < 16; i++) 194 for (i = 0; i < 16; i++)
195 words[i] = SWAP_LE32(words[i]); 195 words[i] = SWAP_LE32(words[i]);
196# endif
197 196
198# if MD5_SMALL == 3 197# if MD5_SMALL == 3
199 pc = C_array; 198 pc = C_array;
@@ -467,12 +466,13 @@ void FAST_FUNC md5_end(md5_ctx_t *ctx, void *resbuf)
467 common64_end(ctx, /*swap_needed:*/ BB_BIG_ENDIAN); 466 common64_end(ctx, /*swap_needed:*/ BB_BIG_ENDIAN);
468 467
469 /* The MD5 result is in little endian byte order */ 468 /* The MD5 result is in little endian byte order */
470#if BB_BIG_ENDIAN 469 if (BB_BIG_ENDIAN) {
471 ctx->hash[0] = SWAP_LE32(ctx->hash[0]); 470 ctx->hash[0] = SWAP_LE32(ctx->hash[0]);
472 ctx->hash[1] = SWAP_LE32(ctx->hash[1]); 471 ctx->hash[1] = SWAP_LE32(ctx->hash[1]);
473 ctx->hash[2] = SWAP_LE32(ctx->hash[2]); 472 ctx->hash[2] = SWAP_LE32(ctx->hash[2]);
474 ctx->hash[3] = SWAP_LE32(ctx->hash[3]); 473 ctx->hash[3] = SWAP_LE32(ctx->hash[3]);
475#endif 474 }
475
476 memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4); 476 memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4);
477} 477}
478 478
@@ -927,59 +927,61 @@ void FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf)
927#endif 927#endif
928 928
929enum { 929enum {
930 KECCAK_IBLK_BYTES = 576 / 8, 930 SHA3_IBLK_BYTES = 72, /* 576 bits / 8 */
931 KECCAK_NROUNDS = 24,
932}; 931};
933 932
934/* Elements should be 64-bit, but top half is always zero or 0x80000000. 933/*
935 * We encode 63rd bits in a separate word below. 934 * In the crypto literature this function is usually called Keccak-f().
936 * Same is true for 31th bits, which lets us use 16-bit table instead of 64-bit.
937 * The speed penalty is lost in the noise.
938 */ 935 */
939static const uint16_t KECCAK_IOTA_CONST[KECCAK_NROUNDS] = { 936static void sha3_process_block76(uint64_t *state)
940 0x0001U,
941 0x8082U,
942 0x808aU,
943 0x8000U,
944 0x808bU,
945 0x0001U,
946 0x8081U,
947 0x8009U,
948 0x008aU,
949 0x0088U,
950 0x8009U,
951 0x000aU,
952 0x808bU,
953 0x008bU,
954 0x8089U,
955 0x8003U,
956 0x8002U,
957 0x0080U,
958 0x800aU,
959 0x000aU,
960 0x8081U,
961 0x8080U,
962 0x0001U,
963 0x8008U,
964};
965/* bit from CONST[0] is msb: 0011 0011 0000 0111 1101 1101 */
966#define KECCAK_IOTA_CONST_bit63 ((uint32_t)(0x3307dd00))
967/* bit from CONST[0] is msb: 0001 0110 0011 1000 0001 1011 */
968#define KECCAK_IOTA_CONST_bit31 ((uint32_t)(0x16381b00))
969
970static const uint8_t KECCAK_ROT_CONST[25] = {
971 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, 27, 41, 56, 8, 25, 43, 62,
972 18, 39, 61, 20, 44
973};
974
975static const uint8_t KECCAK_PI_LANE[25] = {
976 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, 15, 23, 19, 13, 12, 2, 20,
977 14, 22, 9, 6, 1
978};
979
980static void KeccakF(uint64_t *state)
981{ 937{
982 /*static const uint8_t MOD5[10] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4 };*/ 938 enum { NROUNDS = 24 };
939
940 /* Elements should be 64-bit, but top half is always zero or 0x80000000.
941 * We encode 63rd bits in a separate word below.
942 * Same is true for 31th bits, which lets us use 16-bit table instead of 64-bit.
943 * The speed penalty is lost in the noise.
944 */
945 static const uint16_t IOTA_CONST[NROUNDS] = {
946 0x0001,
947 0x8082,
948 0x808a,
949 0x8000,
950 0x808b,
951 0x0001,
952 0x8081,
953 0x8009,
954 0x008a,
955 0x0088,
956 0x8009,
957 0x000a,
958 0x808b,
959 0x008b,
960 0x8089,
961 0x8003,
962 0x8002,
963 0x0080,
964 0x800a,
965 0x000a,
966 0x8081,
967 0x8080,
968 0x0001,
969 0x8008,
970 };
971 /* bit for CONST[0] is in msb: 0011 0011 0000 0111 1101 1101 */
972 const uint32_t IOTA_CONST_bit63 = (uint32_t)(0x3307dd00);
973 /* bit for CONST[0] is in msb: 0001 0110 0011 1000 0001 1011 */
974 const uint32_t IOTA_CONST_bit31 = (uint32_t)(0x16381b00);
975
976 static const uint8_t ROT_CONST[24] = {
977 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
978 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44,
979 };
980 static const uint8_t PI_LANE[24] = {
981 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
982 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1,
983 };
984 /*static const uint8_t MOD5[10] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, };*/
983 985
984 unsigned x, y; 986 unsigned x, y;
985 unsigned round; 987 unsigned round;
@@ -990,7 +992,7 @@ static void KeccakF(uint64_t *state)
990 } 992 }
991 } 993 }
992 994
993 for (round = 0; round < KECCAK_NROUNDS; ++round) { 995 for (round = 0; round < NROUNDS; ++round) {
994 /* Theta */ 996 /* Theta */
995 { 997 {
996 uint64_t BC[10]; 998 uint64_t BC[10];
@@ -1017,24 +1019,24 @@ static void KeccakF(uint64_t *state)
1017 if (SHA3_SMALL) { 1019 if (SHA3_SMALL) {
1018 uint64_t t1 = state[1]; 1020 uint64_t t1 = state[1];
1019 for (x = 0; x < 24; ++x) { 1021 for (x = 0; x < 24; ++x) {
1020 uint64_t t0 = state[KECCAK_PI_LANE[x]]; 1022 uint64_t t0 = state[PI_LANE[x]];
1021 state[KECCAK_PI_LANE[x]] = rotl64(t1, KECCAK_ROT_CONST[x]); 1023 state[PI_LANE[x]] = rotl64(t1, ROT_CONST[x]);
1022 t1 = t0; 1024 t1 = t0;
1023 } 1025 }
1024 } else { 1026 } else {
1025 /* Especially large benefit for 32-bit arch (75% faster): 1027 /* Especially large benefit for 32-bit arch (75% faster):
1026 * 64-bit rotations by non-constant usually are SLOW on those. 1028 * 64-bit rotations by non-constant usually are SLOW on those.
1027 * We resort to unrolling here. 1029 * We resort to unrolling here.
1028 * This optimizes out KECCAK_PI_LANE[] and KECCAK_ROT_CONST[], 1030 * This optimizes out PI_LANE[] and ROT_CONST[],
1029 * but generates 300-500 more bytes of code. 1031 * but generates 300-500 more bytes of code.
1030 */ 1032 */
1031 uint64_t t0; 1033 uint64_t t0;
1032 uint64_t t1 = state[1]; 1034 uint64_t t1 = state[1];
1033#define RhoPi_twice(x) \ 1035#define RhoPi_twice(x) \
1034 t0 = state[KECCAK_PI_LANE[x ]]; \ 1036 t0 = state[PI_LANE[x ]]; \
1035 state[KECCAK_PI_LANE[x ]] = rotl64(t1, KECCAK_ROT_CONST[x ]); \ 1037 state[PI_LANE[x ]] = rotl64(t1, ROT_CONST[x ]); \
1036 t1 = state[KECCAK_PI_LANE[x+1]]; \ 1038 t1 = state[PI_LANE[x+1]]; \
1037 state[KECCAK_PI_LANE[x+1]] = rotl64(t0, KECCAK_ROT_CONST[x+1]); 1039 state[PI_LANE[x+1]] = rotl64(t0, ROT_CONST[x+1]);
1038 RhoPi_twice(0); RhoPi_twice(2); 1040 RhoPi_twice(0); RhoPi_twice(2);
1039 RhoPi_twice(4); RhoPi_twice(6); 1041 RhoPi_twice(4); RhoPi_twice(6);
1040 RhoPi_twice(8); RhoPi_twice(10); 1042 RhoPi_twice(8); RhoPi_twice(10);
@@ -1060,9 +1062,9 @@ static void KeccakF(uint64_t *state)
1060 } 1062 }
1061 1063
1062 /* Iota */ 1064 /* Iota */
1063 state[0] ^= KECCAK_IOTA_CONST[round] 1065 state[0] ^= IOTA_CONST[round]
1064 | (uint32_t)((KECCAK_IOTA_CONST_bit31 << round) & 0x80000000) 1066 | (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000)
1065 | (uint64_t)((KECCAK_IOTA_CONST_bit63 << round) & 0x80000000) << 32; 1067 | (uint64_t)((IOTA_CONST_bit63 << round) & 0x80000000) << 32;
1066 } 1068 }
1067 1069
1068 if (BB_BIG_ENDIAN) { 1070 if (BB_BIG_ENDIAN) {
@@ -1088,19 +1090,19 @@ void FAST_FUNC sha3_hash(sha3_ctx_t *ctx, const void *buf, size_t bytes)
1088 buffer[bytes_queued] ^= *data++; 1090 buffer[bytes_queued] ^= *data++;
1089 bytes--; 1091 bytes--;
1090 bytes_queued++; 1092 bytes_queued++;
1091 if (bytes_queued == KECCAK_IBLK_BYTES) { 1093 if (bytes_queued == SHA3_IBLK_BYTES) {
1092 KeccakF(ctx->state); 1094 sha3_process_block76(ctx->state);
1093 bytes_queued = 0; 1095 bytes_queued = 0;
1094 } 1096 }
1095 } 1097 }
1096 1098
1097 /* Absorb complete blocks */ 1099 /* Absorb complete blocks */
1098 while (bytes >= KECCAK_IBLK_BYTES) { 1100 while (bytes >= SHA3_IBLK_BYTES) {
1099 /* XOR data onto beginning of state[]. 1101 /* XOR data onto beginning of state[].
1100 * We try to be efficient - operate on word at a time, not byte. 1102 * We try to be efficient - operate on word at a time, not byte.
1101 * Yet safe wrt unaligned access: can't just use "*(long*)data"... 1103 * Yet safe wrt unaligned access: can't just use "*(long*)data"...
1102 */ 1104 */
1103 unsigned count = KECCAK_IBLK_BYTES / sizeof(long); 1105 unsigned count = SHA3_IBLK_BYTES / sizeof(long);
1104 long *buffer = (long*)ctx->state; 1106 long *buffer = (long*)ctx->state;
1105 do { 1107 do {
1106 long v; 1108 long v;
@@ -1109,9 +1111,9 @@ void FAST_FUNC sha3_hash(sha3_ctx_t *ctx, const void *buf, size_t bytes)
1109 data += sizeof(long); 1111 data += sizeof(long);
1110 } while (--count); 1112 } while (--count);
1111 1113
1112 KeccakF(ctx->state); 1114 sha3_process_block76(ctx->state);
1113 1115
1114 bytes -= KECCAK_IBLK_BYTES; 1116 bytes -= SHA3_IBLK_BYTES;
1115 } 1117 }
1116 1118
1117 /* Queue remaining data bytes */ 1119 /* Queue remaining data bytes */
@@ -1129,10 +1131,10 @@ void FAST_FUNC sha3_end(sha3_ctx_t *ctx, uint8_t *hashval)
1129{ 1131{
1130 /* Padding */ 1132 /* Padding */
1131 uint8_t *buffer = (uint8_t*)ctx->state; 1133 uint8_t *buffer = (uint8_t*)ctx->state;
1132 buffer[ctx->bytes_queued] ^= 1; 1134 buffer[ctx->bytes_queued] ^= 1;
1133 buffer[KECCAK_IBLK_BYTES - 1] ^= 0x80; 1135 buffer[SHA3_IBLK_BYTES - 1] ^= 0x80;
1134 1136
1135 KeccakF(ctx->state); 1137 sha3_process_block76(ctx->state);
1136 1138
1137 /* Output */ 1139 /* Output */
1138 memcpy(hashval, ctx->state, 64); 1140 memcpy(hashval, ctx->state, 64);