diff options
Diffstat (limited to 'libbb/hash_md5_sha_x86-64.S.sh')
-rwxr-xr-x | libbb/hash_md5_sha_x86-64.S.sh | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/libbb/hash_md5_sha_x86-64.S.sh b/libbb/hash_md5_sha_x86-64.S.sh index 656fb5414..fb1e4b57e 100755 --- a/libbb/hash_md5_sha_x86-64.S.sh +++ b/libbb/hash_md5_sha_x86-64.S.sh | |||
@@ -203,8 +203,13 @@ echo "# PREP $@ | |||
203 | movaps $xmmW12, $xmmT1 | 203 | movaps $xmmW12, $xmmT1 |
204 | psrldq \$4, $xmmT1 # rshift by 4 bytes: T1 = ([13],[14],[15],0) | 204 | psrldq \$4, $xmmT1 # rshift by 4 bytes: T1 = ([13],[14],[15],0) |
205 | 205 | ||
206 | pshufd \$0x4e, $xmmW0, $xmmT2 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x) | 206 | # pshufd \$0x4e, $xmmW0, $xmmT2 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x) |
207 | punpcklqdq $xmmW4, $xmmT2 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5]) | 207 | # punpcklqdq $xmmW4, $xmmT2 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5]) |
208 | # same result as above, but shorter and faster: | ||
209 | # pshufd/shufps are subtly different: pshufd takes all dwords from source operand, | ||
210 | # shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one! | ||
211 | movaps $xmmW0, $xmmT2 | ||
212 | shufps \$0x4e, $xmmW4, $xmmT2 # 01001110=(T2.dw[2], T2.dw[3], W4.dw[0], W4.dw[1]) = ([2],[3],[4],[5]) | ||
208 | 213 | ||
209 | xorps $xmmW8, $xmmW0 # ([8],[9],[10],[11]) ^ ([0],[1],[2],[3]) | 214 | xorps $xmmW8, $xmmW0 # ([8],[9],[10],[11]) ^ ([0],[1],[2],[3]) |
210 | xorps $xmmT1, $xmmT2 # ([13],[14],[15],0) ^ ([2],[3],[4],[5]) | 215 | xorps $xmmT1, $xmmT2 # ([13],[14],[15],0) ^ ([2],[3],[4],[5]) |