aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libbb/hash_md5_sha.c82
1 files changed, 66 insertions, 16 deletions
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c
index dff583ad1..4cef2aba1 100644
--- a/libbb/hash_md5_sha.c
+++ b/libbb/hash_md5_sha.c
@@ -1141,22 +1141,30 @@ static void sha3_process_block72(uint64_t *state)
1141#undef RhoPi 1141#undef RhoPi
1142 } 1142 }
1143 /* Chi */ 1143 /* Chi */
1144 for (x = 0; x <= 20; x += 5) { 1144 for (x = 0; x <= 40;) {
1145 /* 1145 uint32_t BC0, BC1, BC2, BC3, BC4;
1146 * Can write this in terms of uint32 too, 1146 BC0 = s32[x + 0*2];
1147 * but why? compiler does it automatically. 1147 BC1 = s32[x + 1*2];
1148 */ 1148 BC2 = s32[x + 2*2];
1149 uint64_t BC0, BC1, BC2, BC3, BC4; 1149 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1150 BC0 = state[x + 0]; 1150 BC3 = s32[x + 3*2];
1151 BC1 = state[x + 1]; 1151 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1152 BC2 = state[x + 2]; 1152 BC4 = s32[x + 4*2];
1153 state[x + 0] = BC0 ^ ((~BC1) & BC2); 1153 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1154 BC3 = state[x + 3]; 1154 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1155 state[x + 1] = BC1 ^ ((~BC2) & BC3); 1155 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1156 BC4 = state[x + 4]; 1156 x++;
1157 state[x + 2] = BC2 ^ ((~BC3) & BC4); 1157 BC0 = s32[x + 0*2];
1158 state[x + 3] = BC3 ^ ((~BC4) & BC0); 1158 BC1 = s32[x + 1*2];
1159 state[x + 4] = BC4 ^ ((~BC0) & BC1); 1159 BC2 = s32[x + 2*2];
1160 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1161 BC3 = s32[x + 3*2];
1162 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1163 BC4 = s32[x + 4*2];
1164 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1165 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1166 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1167 x += 9;
1160 } 1168 }
1161 /* Iota */ 1169 /* Iota */
1162 s32[0] ^= IOTA_CONST_0bits & 1; 1170 s32[0] ^= IOTA_CONST_0bits & 1;
@@ -1275,6 +1283,7 @@ static void sha3_process_block72(uint64_t *state)
1275#undef RhoPi_twice 1283#undef RhoPi_twice
1276 } 1284 }
1277 /* Chi */ 1285 /* Chi */
1286#if LONG_MAX > 0x7fffffff
1278 for (x = 0; x <= 20; x += 5) { 1287 for (x = 0; x <= 20; x += 5) {
1279 uint64_t BC0, BC1, BC2, BC3, BC4; 1288 uint64_t BC0, BC1, BC2, BC3, BC4;
1280 BC0 = state[x + 0]; 1289 BC0 = state[x + 0];
@@ -1288,6 +1297,47 @@ static void sha3_process_block72(uint64_t *state)
1288 state[x + 3] = BC3 ^ ((~BC4) & BC0); 1297 state[x + 3] = BC3 ^ ((~BC4) & BC0);
1289 state[x + 4] = BC4 ^ ((~BC0) & BC1); 1298 state[x + 4] = BC4 ^ ((~BC0) & BC1);
1290 } 1299 }
1300#else
1301 /* Reduced register pressure version
1302 * for register-starved 32-bit arches
1303 * (i386: -95 bytes, and it is _faster_)
1304 */
1305 for (x = 0; x <= 40;) {
1306 uint32_t BC0, BC1, BC2, BC3, BC4;
1307 uint32_t *const s32 = (uint32_t*)state;
1308# if SHA3_SMALL
1309 do_half:
1310#endif
1311 BC0 = s32[x + 0*2];
1312 BC1 = s32[x + 1*2];
1313 BC2 = s32[x + 2*2];
1314 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1315 BC3 = s32[x + 3*2];
1316 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1317 BC4 = s32[x + 4*2];
1318 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1319 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1320 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1321 x++;
1322# if SHA3_SMALL
1323 if (x & 1)
1324 goto do_half;
1325 x += 8;
1326# else
1327 BC0 = s32[x + 0*2];
1328 BC1 = s32[x + 1*2];
1329 BC2 = s32[x + 2*2];
1330 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1331 BC3 = s32[x + 3*2];
1332 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1333 BC4 = s32[x + 4*2];
1334 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1335 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1336 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1337 x += 9;
1338# endif
1339 }
1340#endif
1291 /* Iota */ 1341 /* Iota */
1292 state[0] ^= IOTA_CONST[round] 1342 state[0] ^= IOTA_CONST[round]
1293 | (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000) 1343 | (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000)