diff options
-rw-r--r-- | libbb/hash_md5_sha.c | 82 |
1 files changed, 66 insertions, 16 deletions
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index dff583ad1..4cef2aba1 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c | |||
@@ -1141,22 +1141,30 @@ static void sha3_process_block72(uint64_t *state) | |||
1141 | #undef RhoPi | 1141 | #undef RhoPi |
1142 | } | 1142 | } |
1143 | /* Chi */ | 1143 | /* Chi */ |
1144 | for (x = 0; x <= 20; x += 5) { | 1144 | for (x = 0; x <= 40;) { |
1145 | /* | 1145 | uint32_t BC0, BC1, BC2, BC3, BC4; |
1146 | * Can write this in terms of uint32 too, | 1146 | BC0 = s32[x + 0*2]; |
1147 | * but why? compiler does it automatically. | 1147 | BC1 = s32[x + 1*2]; |
1148 | */ | 1148 | BC2 = s32[x + 2*2]; |
1149 | uint64_t BC0, BC1, BC2, BC3, BC4; | 1149 | s32[x + 0*2] = BC0 ^ ((~BC1) & BC2); |
1150 | BC0 = state[x + 0]; | 1150 | BC3 = s32[x + 3*2]; |
1151 | BC1 = state[x + 1]; | 1151 | s32[x + 1*2] = BC1 ^ ((~BC2) & BC3); |
1152 | BC2 = state[x + 2]; | 1152 | BC4 = s32[x + 4*2]; |
1153 | state[x + 0] = BC0 ^ ((~BC1) & BC2); | 1153 | s32[x + 2*2] = BC2 ^ ((~BC3) & BC4); |
1154 | BC3 = state[x + 3]; | 1154 | s32[x + 3*2] = BC3 ^ ((~BC4) & BC0); |
1155 | state[x + 1] = BC1 ^ ((~BC2) & BC3); | 1155 | s32[x + 4*2] = BC4 ^ ((~BC0) & BC1); |
1156 | BC4 = state[x + 4]; | 1156 | x++; |
1157 | state[x + 2] = BC2 ^ ((~BC3) & BC4); | 1157 | BC0 = s32[x + 0*2]; |
1158 | state[x + 3] = BC3 ^ ((~BC4) & BC0); | 1158 | BC1 = s32[x + 1*2]; |
1159 | state[x + 4] = BC4 ^ ((~BC0) & BC1); | 1159 | BC2 = s32[x + 2*2]; |
1160 | s32[x + 0*2] = BC0 ^ ((~BC1) & BC2); | ||
1161 | BC3 = s32[x + 3*2]; | ||
1162 | s32[x + 1*2] = BC1 ^ ((~BC2) & BC3); | ||
1163 | BC4 = s32[x + 4*2]; | ||
1164 | s32[x + 2*2] = BC2 ^ ((~BC3) & BC4); | ||
1165 | s32[x + 3*2] = BC3 ^ ((~BC4) & BC0); | ||
1166 | s32[x + 4*2] = BC4 ^ ((~BC0) & BC1); | ||
1167 | x += 9; | ||
1160 | } | 1168 | } |
1161 | /* Iota */ | 1169 | /* Iota */ |
1162 | s32[0] ^= IOTA_CONST_0bits & 1; | 1170 | s32[0] ^= IOTA_CONST_0bits & 1; |
@@ -1275,6 +1283,7 @@ static void sha3_process_block72(uint64_t *state) | |||
1275 | #undef RhoPi_twice | 1283 | #undef RhoPi_twice |
1276 | } | 1284 | } |
1277 | /* Chi */ | 1285 | /* Chi */ |
1286 | #if LONG_MAX > 0x7fffffff | ||
1278 | for (x = 0; x <= 20; x += 5) { | 1287 | for (x = 0; x <= 20; x += 5) { |
1279 | uint64_t BC0, BC1, BC2, BC3, BC4; | 1288 | uint64_t BC0, BC1, BC2, BC3, BC4; |
1280 | BC0 = state[x + 0]; | 1289 | BC0 = state[x + 0]; |
@@ -1288,6 +1297,47 @@ static void sha3_process_block72(uint64_t *state) | |||
1288 | state[x + 3] = BC3 ^ ((~BC4) & BC0); | 1297 | state[x + 3] = BC3 ^ ((~BC4) & BC0); |
1289 | state[x + 4] = BC4 ^ ((~BC0) & BC1); | 1298 | state[x + 4] = BC4 ^ ((~BC0) & BC1); |
1290 | } | 1299 | } |
1300 | #else | ||
1301 | /* Reduced register pressure version | ||
1302 | * for register-starved 32-bit arches | ||
1303 | * (i386: -95 bytes, and it is _faster_) | ||
1304 | */ | ||
1305 | for (x = 0; x <= 40;) { | ||
1306 | uint32_t BC0, BC1, BC2, BC3, BC4; | ||
1307 | uint32_t *const s32 = (uint32_t*)state; | ||
1308 | # if SHA3_SMALL | ||
1309 | do_half: | ||
1310 | #endif | ||
1311 | BC0 = s32[x + 0*2]; | ||
1312 | BC1 = s32[x + 1*2]; | ||
1313 | BC2 = s32[x + 2*2]; | ||
1314 | s32[x + 0*2] = BC0 ^ ((~BC1) & BC2); | ||
1315 | BC3 = s32[x + 3*2]; | ||
1316 | s32[x + 1*2] = BC1 ^ ((~BC2) & BC3); | ||
1317 | BC4 = s32[x + 4*2]; | ||
1318 | s32[x + 2*2] = BC2 ^ ((~BC3) & BC4); | ||
1319 | s32[x + 3*2] = BC3 ^ ((~BC4) & BC0); | ||
1320 | s32[x + 4*2] = BC4 ^ ((~BC0) & BC1); | ||
1321 | x++; | ||
1322 | # if SHA3_SMALL | ||
1323 | if (x & 1) | ||
1324 | goto do_half; | ||
1325 | x += 8; | ||
1326 | # else | ||
1327 | BC0 = s32[x + 0*2]; | ||
1328 | BC1 = s32[x + 1*2]; | ||
1329 | BC2 = s32[x + 2*2]; | ||
1330 | s32[x + 0*2] = BC0 ^ ((~BC1) & BC2); | ||
1331 | BC3 = s32[x + 3*2]; | ||
1332 | s32[x + 1*2] = BC1 ^ ((~BC2) & BC3); | ||
1333 | BC4 = s32[x + 4*2]; | ||
1334 | s32[x + 2*2] = BC2 ^ ((~BC3) & BC4); | ||
1335 | s32[x + 3*2] = BC3 ^ ((~BC4) & BC0); | ||
1336 | s32[x + 4*2] = BC4 ^ ((~BC0) & BC1); | ||
1337 | x += 9; | ||
1338 | # endif | ||
1339 | } | ||
1340 | #endif | ||
1291 | /* Iota */ | 1341 | /* Iota */ |
1292 | state[0] ^= IOTA_CONST[round] | 1342 | state[0] ^= IOTA_CONST[round] |
1293 | | (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000) | 1343 | | (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000) |