diff options
Diffstat (limited to 'libbb/hash_md5_sha_x86-32_shaNI.S')
-rw-r--r-- | libbb/hash_md5_sha_x86-32_shaNI.S | 37 |
1 files changed, 17 insertions, 20 deletions
diff --git a/libbb/hash_md5_sha_x86-32_shaNI.S b/libbb/hash_md5_sha_x86-32_shaNI.S index 5d082ebfb..0f3fe57ca 100644 --- a/libbb/hash_md5_sha_x86-32_shaNI.S +++ b/libbb/hash_md5_sha_x86-32_shaNI.S | |||
@@ -32,14 +32,10 @@ | |||
32 | #define MSG1 %xmm4 | 32 | #define MSG1 %xmm4 |
33 | #define MSG2 %xmm5 | 33 | #define MSG2 %xmm5 |
34 | #define MSG3 %xmm6 | 34 | #define MSG3 %xmm6 |
35 | #define SHUF_MASK %xmm7 | ||
36 | 35 | ||
37 | .balign 8 # allow decoders to fetch at least 3 first insns | 36 | .balign 8 # allow decoders to fetch at least 2 first insns |
38 | sha1_process_block64_shaNI: | 37 | sha1_process_block64_shaNI: |
39 | pushl %ebp | 38 | subl $16, %esp |
40 | movl %esp, %ebp | ||
41 | subl $32, %esp | ||
42 | andl $~0xF, %esp # paddd needs aligned memory operand | ||
43 | 39 | ||
44 | /* load initial hash values */ | 40 | /* load initial hash values */ |
45 | xor128 E0, E0 | 41 | xor128 E0, E0 |
@@ -47,30 +43,33 @@ sha1_process_block64_shaNI: | |||
47 | pinsrd $3, 76+4*4(%eax), E0 # load to uppermost 32-bit word | 43 | pinsrd $3, 76+4*4(%eax), E0 # load to uppermost 32-bit word |
48 | shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD | 44 | shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD |
49 | 45 | ||
50 | mova128 PSHUFFLE_BYTE_FLIP_MASK, SHUF_MASK | 46 | mova128 PSHUFFLE_BYTE_FLIP_MASK, %xmm7 |
47 | |||
48 | movu128 0*16(%eax), MSG0 | ||
49 | pshufb %xmm7, MSG0 | ||
50 | movu128 1*16(%eax), MSG1 | ||
51 | pshufb %xmm7, MSG1 | ||
52 | movu128 2*16(%eax), MSG2 | ||
53 | pshufb %xmm7, MSG2 | ||
54 | movu128 3*16(%eax), MSG3 | ||
55 | pshufb %xmm7, MSG3 | ||
51 | 56 | ||
52 | /* Save hash values for addition after rounds */ | 57 | /* Save hash values for addition after rounds */ |
53 | movu128 E0, 16(%esp) | 58 | movu128 E0, %xmm7 |
54 | movu128 ABCD, (%esp) | 59 | movu128 ABCD, (%esp) |
55 | 60 | ||
56 | /* Rounds 0-3 */ | 61 | /* Rounds 0-3 */ |
57 | movu128 0*16(%eax), MSG0 | ||
58 | pshufb SHUF_MASK, MSG0 | ||
59 | paddd MSG0, E0 | 62 | paddd MSG0, E0 |
60 | mova128 ABCD, E1 | 63 | mova128 ABCD, E1 |
61 | sha1rnds4 $0, E0, ABCD | 64 | sha1rnds4 $0, E0, ABCD |
62 | 65 | ||
63 | /* Rounds 4-7 */ | 66 | /* Rounds 4-7 */ |
64 | movu128 1*16(%eax), MSG1 | ||
65 | pshufb SHUF_MASK, MSG1 | ||
66 | sha1nexte MSG1, E1 | 67 | sha1nexte MSG1, E1 |
67 | mova128 ABCD, E0 | 68 | mova128 ABCD, E0 |
68 | sha1rnds4 $0, E1, ABCD | 69 | sha1rnds4 $0, E1, ABCD |
69 | sha1msg1 MSG1, MSG0 | 70 | sha1msg1 MSG1, MSG0 |
70 | 71 | ||
71 | /* Rounds 8-11 */ | 72 | /* Rounds 8-11 */ |
72 | movu128 2*16(%eax), MSG2 | ||
73 | pshufb SHUF_MASK, MSG2 | ||
74 | sha1nexte MSG2, E0 | 73 | sha1nexte MSG2, E0 |
75 | mova128 ABCD, E1 | 74 | mova128 ABCD, E1 |
76 | sha1rnds4 $0, E0, ABCD | 75 | sha1rnds4 $0, E0, ABCD |
@@ -78,8 +77,6 @@ sha1_process_block64_shaNI: | |||
78 | xor128 MSG2, MSG0 | 77 | xor128 MSG2, MSG0 |
79 | 78 | ||
80 | /* Rounds 12-15 */ | 79 | /* Rounds 12-15 */ |
81 | movu128 3*16(%eax), MSG3 | ||
82 | pshufb SHUF_MASK, MSG3 | ||
83 | sha1nexte MSG3, E1 | 80 | sha1nexte MSG3, E1 |
84 | mova128 ABCD, E0 | 81 | mova128 ABCD, E0 |
85 | sha1msg2 MSG3, MSG0 | 82 | sha1msg2 MSG3, MSG0 |
@@ -210,16 +207,16 @@ sha1_process_block64_shaNI: | |||
210 | sha1rnds4 $3, E1, ABCD | 207 | sha1rnds4 $3, E1, ABCD |
211 | 208 | ||
212 | /* Add current hash values with previously saved */ | 209 | /* Add current hash values with previously saved */ |
213 | sha1nexte 16(%esp), E0 | 210 | sha1nexte %xmm7, E0 |
214 | paddd (%esp), ABCD | 211 | movu128 (%esp), %xmm7 |
212 | paddd %xmm7, ABCD | ||
215 | 213 | ||
216 | /* Write hash values back in the correct order */ | 214 | /* Write hash values back in the correct order */ |
217 | shuf128_32 $0x1B, ABCD, ABCD | 215 | shuf128_32 $0x1B, ABCD, ABCD |
218 | movu128 ABCD, 76(%eax) | 216 | movu128 ABCD, 76(%eax) |
219 | extr128_32 $3, E0, 76+4*4(%eax) | 217 | extr128_32 $3, E0, 76+4*4(%eax) |
220 | 218 | ||
221 | movl %ebp, %esp | 219 | addl $16, %esp |
222 | popl %ebp | ||
223 | ret | 220 | ret |
224 | .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI | 221 | .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI |
225 | 222 | ||