diff options
-rw-r--r-- | libbb/hash_md5_sha_x86-64.S | 8 | ||||
-rwxr-xr-x | libbb/hash_md5_sha_x86-64.S.sh | 14 |
2 files changed, 11 insertions, 11 deletions
diff --git a/libbb/hash_md5_sha_x86-64.S b/libbb/hash_md5_sha_x86-64.S index f0daa30f6..1d55b91f8 100644 --- a/libbb/hash_md5_sha_x86-64.S +++ b/libbb/hash_md5_sha_x86-64.S | |||
@@ -71,8 +71,8 @@ sha1_process_block64: | |||
71 | movq 4*10(%rdi), %r12 | 71 | movq 4*10(%rdi), %r12 |
72 | bswapq %r11 | 72 | bswapq %r11 |
73 | bswapq %r12 | 73 | bswapq %r12 |
74 | rolq $32, %r11 # r11 = W[9]:W[8] | 74 | rolq $32, %r11 # r11 = W[9]:W[8] |
75 | rolq $32, %r12 # r12 = W[11]:W[10] | 75 | rolq $32, %r12 # r12 = W[11]:W[10] |
76 | movq %r11, %xmm2 | 76 | movq %r11, %xmm2 |
77 | movq %r12, %xmm4 | 77 | movq %r12, %xmm4 |
78 | punpcklqdq %xmm4, %xmm2 # xmm2 = r12:r11 = (W[8],W[9],W[10],W[11]) | 78 | punpcklqdq %xmm4, %xmm2 # xmm2 = r12:r11 = (W[8],W[9],W[10],W[11]) |
@@ -81,8 +81,8 @@ sha1_process_block64: | |||
81 | movq 4*14(%rdi), %r14 | 81 | movq 4*14(%rdi), %r14 |
82 | bswapq %r13 | 82 | bswapq %r13 |
83 | bswapq %r14 | 83 | bswapq %r14 |
84 | rolq $32, %r13 # r13 = W[13]:W[12] | 84 | rolq $32, %r13 # r13 = W[13]:W[12] |
85 | rolq $32, %r14 # r14 = W[15]:W[14] | 85 | rolq $32, %r14 # r14 = W[15]:W[14] |
86 | movq %r13, %xmm3 | 86 | movq %r13, %xmm3 |
87 | movq %r14, %xmm4 | 87 | movq %r14, %xmm4 |
88 | punpcklqdq %xmm4, %xmm3 # xmm3 = r14:r13 = (W[12],W[13],W[14],W[15]) | 88 | punpcklqdq %xmm4, %xmm3 # xmm3 = r14:r13 = (W[12],W[13],W[14],W[15]) |
diff --git a/libbb/hash_md5_sha_x86-64.S.sh b/libbb/hash_md5_sha_x86-64.S.sh index 57e77b118..40c979d35 100755 --- a/libbb/hash_md5_sha_x86-64.S.sh +++ b/libbb/hash_md5_sha_x86-64.S.sh | |||
@@ -99,7 +99,7 @@ INTERLEAVE() { | |||
99 | ) | 99 | ) |
100 | } | 100 | } |
101 | 101 | ||
102 | # movaps bswap32_mask(%rip), $xmmT1 | 102 | # movaps bswap32_mask(%rip), $xmmT1 |
103 | # Load W[] to xmm0..3, byteswapping on the fly. | 103 | # Load W[] to xmm0..3, byteswapping on the fly. |
104 | # For iterations 0..15, we pass RCONST+W[] in rsi,r8..r14 | 104 | # For iterations 0..15, we pass RCONST+W[] in rsi,r8..r14 |
105 | # for use in RD1As instead of spilling them to stack. | 105 | # for use in RD1As instead of spilling them to stack. |
@@ -110,8 +110,8 @@ INTERLEAVE() { | |||
110 | # movaps %xmm0, $xmmT2 | 110 | # movaps %xmm0, $xmmT2 |
111 | # paddd $xmmRCONST, $xmmT2 | 111 | # paddd $xmmRCONST, $xmmT2 |
112 | # movq $xmmT2, %rsi | 112 | # movq $xmmT2, %rsi |
113 | # #pextrq \$1, $xmmT2, %r8 #SSE4.1 insn | 113 | # #pextrq \$1, $xmmT2, %r8 #SSE4.1 insn |
114 | # #movhpd $xmmT2, %r8 #can only move to mem, not to reg | 114 | # #movhpd $xmmT2, %r8 #can only move to mem, not to reg |
115 | # shufps \$0x0e, $xmmT2, $xmmT2 # have to use two-insn sequence | 115 | # shufps \$0x0e, $xmmT2, $xmmT2 # have to use two-insn sequence |
116 | # movq $xmmT2, %r8 # instead | 116 | # movq $xmmT2, %r8 # instead |
117 | # ... | 117 | # ... |
@@ -197,8 +197,8 @@ sha1_process_block64: | |||
197 | movq 4*10(%rdi), %r12 | 197 | movq 4*10(%rdi), %r12 |
198 | bswapq %r11 | 198 | bswapq %r11 |
199 | bswapq %r12 | 199 | bswapq %r12 |
200 | rolq \$32, %r11 # r11 = W[9]:W[8] | 200 | rolq \$32, %r11 # r11 = W[9]:W[8] |
201 | rolq \$32, %r12 # r12 = W[11]:W[10] | 201 | rolq \$32, %r12 # r12 = W[11]:W[10] |
202 | movq %r11, %xmm2 | 202 | movq %r11, %xmm2 |
203 | movq %r12, $xmmT1 | 203 | movq %r12, $xmmT1 |
204 | punpcklqdq $xmmT1, %xmm2 # xmm2 = r12:r11 = (W[8],W[9],W[10],W[11]) | 204 | punpcklqdq $xmmT1, %xmm2 # xmm2 = r12:r11 = (W[8],W[9],W[10],W[11]) |
@@ -207,8 +207,8 @@ sha1_process_block64: | |||
207 | movq 4*14(%rdi), %r14 | 207 | movq 4*14(%rdi), %r14 |
208 | bswapq %r13 | 208 | bswapq %r13 |
209 | bswapq %r14 | 209 | bswapq %r14 |
210 | rolq \$32, %r13 # r13 = W[13]:W[12] | 210 | rolq \$32, %r13 # r13 = W[13]:W[12] |
211 | rolq \$32, %r14 # r14 = W[15]:W[14] | 211 | rolq \$32, %r14 # r14 = W[15]:W[14] |
212 | movq %r13, %xmm3 | 212 | movq %r13, %xmm3 |
213 | movq %r14, $xmmT1 | 213 | movq %r14, $xmmT1 |
214 | punpcklqdq $xmmT1, %xmm3 # xmm3 = r14:r13 = (W[12],W[13],W[14],W[15]) | 214 | punpcklqdq $xmmT1, %xmm3 # xmm3 = r14:r13 = (W[12],W[13],W[14],W[15]) |