diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2022-02-08 08:20:27 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2022-02-08 08:22:17 +0100 |
commit | 71a1cccaad679bd102f87283f78c581a8fb0e255 (patch) | |
tree | bfd33cfa7b1f31aedebecfe10fb920515f6f5eae | |
parent | 4923f74e5873b25b8205a4059964cff75ee731a8 (diff) | |
download | busybox-w32-71a1cccaad679bd102f87283f78c581a8fb0e255.tar.gz busybox-w32-71a1cccaad679bd102f87283f78c581a8fb0e255.tar.bz2 busybox-w32-71a1cccaad679bd102f87283f78c581a8fb0e255.zip |
libbb/sha1: shrink x86 hardware accelerated hashing
function old new delta
sha1_process_block64_shaNI 32-bit 524 517 -7
sha1_process_block64_shaNI 64-bit 510 508 -2
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | libbb/hash_md5_sha_x86-32_shaNI.S | 37 | ||||
-rw-r--r-- | libbb/hash_md5_sha_x86-64_shaNI.S | 24 |
2 files changed, 29 insertions, 32 deletions
diff --git a/libbb/hash_md5_sha_x86-32_shaNI.S b/libbb/hash_md5_sha_x86-32_shaNI.S index 5d082ebfb..0f3fe57ca 100644 --- a/libbb/hash_md5_sha_x86-32_shaNI.S +++ b/libbb/hash_md5_sha_x86-32_shaNI.S | |||
@@ -32,14 +32,10 @@ | |||
32 | #define MSG1 %xmm4 | 32 | #define MSG1 %xmm4 |
33 | #define MSG2 %xmm5 | 33 | #define MSG2 %xmm5 |
34 | #define MSG3 %xmm6 | 34 | #define MSG3 %xmm6 |
35 | #define SHUF_MASK %xmm7 | ||
36 | 35 | ||
37 | .balign 8 # allow decoders to fetch at least 3 first insns | 36 | .balign 8 # allow decoders to fetch at least 2 first insns |
38 | sha1_process_block64_shaNI: | 37 | sha1_process_block64_shaNI: |
39 | pushl %ebp | 38 | subl $16, %esp |
40 | movl %esp, %ebp | ||
41 | subl $32, %esp | ||
42 | andl $~0xF, %esp # paddd needs aligned memory operand | ||
43 | 39 | ||
44 | /* load initial hash values */ | 40 | /* load initial hash values */ |
45 | xor128 E0, E0 | 41 | xor128 E0, E0 |
@@ -47,30 +43,33 @@ sha1_process_block64_shaNI: | |||
47 | pinsrd $3, 76+4*4(%eax), E0 # load to uppermost 32-bit word | 43 | pinsrd $3, 76+4*4(%eax), E0 # load to uppermost 32-bit word |
48 | shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD | 44 | shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD |
49 | 45 | ||
50 | mova128 PSHUFFLE_BYTE_FLIP_MASK, SHUF_MASK | 46 | mova128 PSHUFFLE_BYTE_FLIP_MASK, %xmm7 |
47 | |||
48 | movu128 0*16(%eax), MSG0 | ||
49 | pshufb %xmm7, MSG0 | ||
50 | movu128 1*16(%eax), MSG1 | ||
51 | pshufb %xmm7, MSG1 | ||
52 | movu128 2*16(%eax), MSG2 | ||
53 | pshufb %xmm7, MSG2 | ||
54 | movu128 3*16(%eax), MSG3 | ||
55 | pshufb %xmm7, MSG3 | ||
51 | 56 | ||
52 | /* Save hash values for addition after rounds */ | 57 | /* Save hash values for addition after rounds */ |
53 | movu128 E0, 16(%esp) | 58 | movu128 E0, %xmm7 |
54 | movu128 ABCD, (%esp) | 59 | movu128 ABCD, (%esp) |
55 | 60 | ||
56 | /* Rounds 0-3 */ | 61 | /* Rounds 0-3 */ |
57 | movu128 0*16(%eax), MSG0 | ||
58 | pshufb SHUF_MASK, MSG0 | ||
59 | paddd MSG0, E0 | 62 | paddd MSG0, E0 |
60 | mova128 ABCD, E1 | 63 | mova128 ABCD, E1 |
61 | sha1rnds4 $0, E0, ABCD | 64 | sha1rnds4 $0, E0, ABCD |
62 | 65 | ||
63 | /* Rounds 4-7 */ | 66 | /* Rounds 4-7 */ |
64 | movu128 1*16(%eax), MSG1 | ||
65 | pshufb SHUF_MASK, MSG1 | ||
66 | sha1nexte MSG1, E1 | 67 | sha1nexte MSG1, E1 |
67 | mova128 ABCD, E0 | 68 | mova128 ABCD, E0 |
68 | sha1rnds4 $0, E1, ABCD | 69 | sha1rnds4 $0, E1, ABCD |
69 | sha1msg1 MSG1, MSG0 | 70 | sha1msg1 MSG1, MSG0 |
70 | 71 | ||
71 | /* Rounds 8-11 */ | 72 | /* Rounds 8-11 */ |
72 | movu128 2*16(%eax), MSG2 | ||
73 | pshufb SHUF_MASK, MSG2 | ||
74 | sha1nexte MSG2, E0 | 73 | sha1nexte MSG2, E0 |
75 | mova128 ABCD, E1 | 74 | mova128 ABCD, E1 |
76 | sha1rnds4 $0, E0, ABCD | 75 | sha1rnds4 $0, E0, ABCD |
@@ -78,8 +77,6 @@ sha1_process_block64_shaNI: | |||
78 | xor128 MSG2, MSG0 | 77 | xor128 MSG2, MSG0 |
79 | 78 | ||
80 | /* Rounds 12-15 */ | 79 | /* Rounds 12-15 */ |
81 | movu128 3*16(%eax), MSG3 | ||
82 | pshufb SHUF_MASK, MSG3 | ||
83 | sha1nexte MSG3, E1 | 80 | sha1nexte MSG3, E1 |
84 | mova128 ABCD, E0 | 81 | mova128 ABCD, E0 |
85 | sha1msg2 MSG3, MSG0 | 82 | sha1msg2 MSG3, MSG0 |
@@ -210,16 +207,16 @@ sha1_process_block64_shaNI: | |||
210 | sha1rnds4 $3, E1, ABCD | 207 | sha1rnds4 $3, E1, ABCD |
211 | 208 | ||
212 | /* Add current hash values with previously saved */ | 209 | /* Add current hash values with previously saved */ |
213 | sha1nexte 16(%esp), E0 | 210 | sha1nexte %xmm7, E0 |
214 | paddd (%esp), ABCD | 211 | movu128 (%esp), %xmm7 |
212 | paddd %xmm7, ABCD | ||
215 | 213 | ||
216 | /* Write hash values back in the correct order */ | 214 | /* Write hash values back in the correct order */ |
217 | shuf128_32 $0x1B, ABCD, ABCD | 215 | shuf128_32 $0x1B, ABCD, ABCD |
218 | movu128 ABCD, 76(%eax) | 216 | movu128 ABCD, 76(%eax) |
219 | extr128_32 $3, E0, 76+4*4(%eax) | 217 | extr128_32 $3, E0, 76+4*4(%eax) |
220 | 218 | ||
221 | movl %ebp, %esp | 219 | addl $16, %esp |
222 | popl %ebp | ||
223 | ret | 220 | ret |
224 | .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI | 221 | .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI |
225 | 222 | ||
diff --git a/libbb/hash_md5_sha_x86-64_shaNI.S b/libbb/hash_md5_sha_x86-64_shaNI.S index 8ddec87ce..fc2ca92e8 100644 --- a/libbb/hash_md5_sha_x86-64_shaNI.S +++ b/libbb/hash_md5_sha_x86-64_shaNI.S | |||
@@ -32,7 +32,6 @@ | |||
32 | #define MSG1 %xmm4 | 32 | #define MSG1 %xmm4 |
33 | #define MSG2 %xmm5 | 33 | #define MSG2 %xmm5 |
34 | #define MSG3 %xmm6 | 34 | #define MSG3 %xmm6 |
35 | #define SHUF_MASK %xmm7 | ||
36 | 35 | ||
37 | .balign 8 # allow decoders to fetch at least 2 first insns | 36 | .balign 8 # allow decoders to fetch at least 2 first insns |
38 | sha1_process_block64_shaNI: | 37 | sha1_process_block64_shaNI: |
@@ -43,30 +42,33 @@ sha1_process_block64_shaNI: | |||
43 | pinsrd $3, 80+4*4(%rdi), E0 # load to uppermost 32-bit word | 42 | pinsrd $3, 80+4*4(%rdi), E0 # load to uppermost 32-bit word |
44 | shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD | 43 | shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD |
45 | 44 | ||
46 | mova128 PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK | 45 | mova128 PSHUFFLE_BYTE_FLIP_MASK(%rip), %xmm7 |
46 | |||
47 | movu128 0*16(%rdi), MSG0 | ||
48 | pshufb %xmm7, MSG0 | ||
49 | movu128 1*16(%rdi), MSG1 | ||
50 | pshufb %xmm7, MSG1 | ||
51 | movu128 2*16(%rdi), MSG2 | ||
52 | pshufb %xmm7, MSG2 | ||
53 | movu128 3*16(%rdi), MSG3 | ||
54 | pshufb %xmm7, MSG3 | ||
47 | 55 | ||
48 | /* Save hash values for addition after rounds */ | 56 | /* Save hash values for addition after rounds */ |
49 | mova128 E0, %xmm9 | 57 | mova128 E0, %xmm7 |
50 | mova128 ABCD, %xmm8 | 58 | mova128 ABCD, %xmm8 |
51 | 59 | ||
52 | /* Rounds 0-3 */ | 60 | /* Rounds 0-3 */ |
53 | movu128 0*16(%rdi), MSG0 | ||
54 | pshufb SHUF_MASK, MSG0 | ||
55 | paddd MSG0, E0 | 61 | paddd MSG0, E0 |
56 | mova128 ABCD, E1 | 62 | mova128 ABCD, E1 |
57 | sha1rnds4 $0, E0, ABCD | 63 | sha1rnds4 $0, E0, ABCD |
58 | 64 | ||
59 | /* Rounds 4-7 */ | 65 | /* Rounds 4-7 */ |
60 | movu128 1*16(%rdi), MSG1 | ||
61 | pshufb SHUF_MASK, MSG1 | ||
62 | sha1nexte MSG1, E1 | 66 | sha1nexte MSG1, E1 |
63 | mova128 ABCD, E0 | 67 | mova128 ABCD, E0 |
64 | sha1rnds4 $0, E1, ABCD | 68 | sha1rnds4 $0, E1, ABCD |
65 | sha1msg1 MSG1, MSG0 | 69 | sha1msg1 MSG1, MSG0 |
66 | 70 | ||
67 | /* Rounds 8-11 */ | 71 | /* Rounds 8-11 */ |
68 | movu128 2*16(%rdi), MSG2 | ||
69 | pshufb SHUF_MASK, MSG2 | ||
70 | sha1nexte MSG2, E0 | 72 | sha1nexte MSG2, E0 |
71 | mova128 ABCD, E1 | 73 | mova128 ABCD, E1 |
72 | sha1rnds4 $0, E0, ABCD | 74 | sha1rnds4 $0, E0, ABCD |
@@ -74,8 +76,6 @@ sha1_process_block64_shaNI: | |||
74 | xor128 MSG2, MSG0 | 76 | xor128 MSG2, MSG0 |
75 | 77 | ||
76 | /* Rounds 12-15 */ | 78 | /* Rounds 12-15 */ |
77 | movu128 3*16(%rdi), MSG3 | ||
78 | pshufb SHUF_MASK, MSG3 | ||
79 | sha1nexte MSG3, E1 | 79 | sha1nexte MSG3, E1 |
80 | mova128 ABCD, E0 | 80 | mova128 ABCD, E0 |
81 | sha1msg2 MSG3, MSG0 | 81 | sha1msg2 MSG3, MSG0 |
@@ -206,7 +206,7 @@ sha1_process_block64_shaNI: | |||
206 | sha1rnds4 $3, E1, ABCD | 206 | sha1rnds4 $3, E1, ABCD |
207 | 207 | ||
208 | /* Add current hash values with previously saved */ | 208 | /* Add current hash values with previously saved */ |
209 | sha1nexte %xmm9, E0 | 209 | sha1nexte %xmm7, E0 |
210 | paddd %xmm8, ABCD | 210 | paddd %xmm8, ABCD |
211 | 211 | ||
212 | /* Write hash values back in the correct order */ | 212 | /* Write hash values back in the correct order */ |