aboutsummaryrefslogtreecommitdiff
path: root/libbb/hash_md5_sha_x86-32_shaNI.S
diff options
context:
space:
mode:
Diffstat (limited to 'libbb/hash_md5_sha_x86-32_shaNI.S')
-rw-r--r--libbb/hash_md5_sha_x86-32_shaNI.S37
1 files changed, 17 insertions, 20 deletions
diff --git a/libbb/hash_md5_sha_x86-32_shaNI.S b/libbb/hash_md5_sha_x86-32_shaNI.S
index 5d082ebfb..0f3fe57ca 100644
--- a/libbb/hash_md5_sha_x86-32_shaNI.S
+++ b/libbb/hash_md5_sha_x86-32_shaNI.S
@@ -32,14 +32,10 @@
32#define MSG1 %xmm4 32#define MSG1 %xmm4
33#define MSG2 %xmm5 33#define MSG2 %xmm5
34#define MSG3 %xmm6 34#define MSG3 %xmm6
35#define SHUF_MASK %xmm7
36 35
37 .balign 8 # allow decoders to fetch at least 3 first insns 36 .balign 8 # allow decoders to fetch at least 2 first insns
38sha1_process_block64_shaNI: 37sha1_process_block64_shaNI:
39 pushl %ebp 38 subl $16, %esp
40 movl %esp, %ebp
41 subl $32, %esp
42 andl $~0xF, %esp # paddd needs aligned memory operand
43 39
44 /* load initial hash values */ 40 /* load initial hash values */
45 xor128 E0, E0 41 xor128 E0, E0
@@ -47,30 +43,33 @@ sha1_process_block64_shaNI:
47 pinsrd $3, 76+4*4(%eax), E0 # load to uppermost 32-bit word 43 pinsrd $3, 76+4*4(%eax), E0 # load to uppermost 32-bit word
48 shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD 44 shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD
49 45
50 mova128 PSHUFFLE_BYTE_FLIP_MASK, SHUF_MASK 46 mova128 PSHUFFLE_BYTE_FLIP_MASK, %xmm7
47
48 movu128 0*16(%eax), MSG0
49 pshufb %xmm7, MSG0
50 movu128 1*16(%eax), MSG1
51 pshufb %xmm7, MSG1
52 movu128 2*16(%eax), MSG2
53 pshufb %xmm7, MSG2
54 movu128 3*16(%eax), MSG3
55 pshufb %xmm7, MSG3
51 56
52 /* Save hash values for addition after rounds */ 57 /* Save hash values for addition after rounds */
53 movu128 E0, 16(%esp) 58 movu128 E0, %xmm7
54 movu128 ABCD, (%esp) 59 movu128 ABCD, (%esp)
55 60
56 /* Rounds 0-3 */ 61 /* Rounds 0-3 */
57 movu128 0*16(%eax), MSG0
58 pshufb SHUF_MASK, MSG0
59 paddd MSG0, E0 62 paddd MSG0, E0
60 mova128 ABCD, E1 63 mova128 ABCD, E1
61 sha1rnds4 $0, E0, ABCD 64 sha1rnds4 $0, E0, ABCD
62 65
63 /* Rounds 4-7 */ 66 /* Rounds 4-7 */
64 movu128 1*16(%eax), MSG1
65 pshufb SHUF_MASK, MSG1
66 sha1nexte MSG1, E1 67 sha1nexte MSG1, E1
67 mova128 ABCD, E0 68 mova128 ABCD, E0
68 sha1rnds4 $0, E1, ABCD 69 sha1rnds4 $0, E1, ABCD
69 sha1msg1 MSG1, MSG0 70 sha1msg1 MSG1, MSG0
70 71
71 /* Rounds 8-11 */ 72 /* Rounds 8-11 */
72 movu128 2*16(%eax), MSG2
73 pshufb SHUF_MASK, MSG2
74 sha1nexte MSG2, E0 73 sha1nexte MSG2, E0
75 mova128 ABCD, E1 74 mova128 ABCD, E1
76 sha1rnds4 $0, E0, ABCD 75 sha1rnds4 $0, E0, ABCD
@@ -78,8 +77,6 @@ sha1_process_block64_shaNI:
78 xor128 MSG2, MSG0 77 xor128 MSG2, MSG0
79 78
80 /* Rounds 12-15 */ 79 /* Rounds 12-15 */
81 movu128 3*16(%eax), MSG3
82 pshufb SHUF_MASK, MSG3
83 sha1nexte MSG3, E1 80 sha1nexte MSG3, E1
84 mova128 ABCD, E0 81 mova128 ABCD, E0
85 sha1msg2 MSG3, MSG0 82 sha1msg2 MSG3, MSG0
@@ -210,16 +207,16 @@ sha1_process_block64_shaNI:
210 sha1rnds4 $3, E1, ABCD 207 sha1rnds4 $3, E1, ABCD
211 208
212 /* Add current hash values with previously saved */ 209 /* Add current hash values with previously saved */
213 sha1nexte 16(%esp), E0 210 sha1nexte %xmm7, E0
214 paddd (%esp), ABCD 211 movu128 (%esp), %xmm7
212 paddd %xmm7, ABCD
215 213
216 /* Write hash values back in the correct order */ 214 /* Write hash values back in the correct order */
217 shuf128_32 $0x1B, ABCD, ABCD 215 shuf128_32 $0x1B, ABCD, ABCD
218 movu128 ABCD, 76(%eax) 216 movu128 ABCD, 76(%eax)
219 extr128_32 $3, E0, 76+4*4(%eax) 217 extr128_32 $3, E0, 76+4*4(%eax)
220 218
221 movl %ebp, %esp 219 addl $16, %esp
222 popl %ebp
223 ret 220 ret
224 .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI 221 .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI
225 222