aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2022-02-08 08:20:27 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2022-02-08 08:22:17 +0100
commit71a1cccaad679bd102f87283f78c581a8fb0e255 (patch)
treebfd33cfa7b1f31aedebecfe10fb920515f6f5eae
parent4923f74e5873b25b8205a4059964cff75ee731a8 (diff)
downloadbusybox-w32-71a1cccaad679bd102f87283f78c581a8fb0e255.tar.gz
busybox-w32-71a1cccaad679bd102f87283f78c581a8fb0e255.tar.bz2
busybox-w32-71a1cccaad679bd102f87283f78c581a8fb0e255.zip
libbb/sha1: shrink x86 hardware accelerated hashing
function old new delta sha1_process_block64_shaNI 32-bit 524 517 -7 sha1_process_block64_shaNI 64-bit 510 508 -2 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--libbb/hash_md5_sha_x86-32_shaNI.S37
-rw-r--r--libbb/hash_md5_sha_x86-64_shaNI.S24
2 files changed, 29 insertions, 32 deletions
diff --git a/libbb/hash_md5_sha_x86-32_shaNI.S b/libbb/hash_md5_sha_x86-32_shaNI.S
index 5d082ebfb..0f3fe57ca 100644
--- a/libbb/hash_md5_sha_x86-32_shaNI.S
+++ b/libbb/hash_md5_sha_x86-32_shaNI.S
@@ -32,14 +32,10 @@
32#define MSG1 %xmm4 32#define MSG1 %xmm4
33#define MSG2 %xmm5 33#define MSG2 %xmm5
34#define MSG3 %xmm6 34#define MSG3 %xmm6
35#define SHUF_MASK %xmm7
36 35
37 .balign 8 # allow decoders to fetch at least 3 first insns 36 .balign 8 # allow decoders to fetch at least 2 first insns
38sha1_process_block64_shaNI: 37sha1_process_block64_shaNI:
39 pushl %ebp 38 subl $16, %esp
40 movl %esp, %ebp
41 subl $32, %esp
42 andl $~0xF, %esp # paddd needs aligned memory operand
43 39
44 /* load initial hash values */ 40 /* load initial hash values */
45 xor128 E0, E0 41 xor128 E0, E0
@@ -47,30 +43,33 @@ sha1_process_block64_shaNI:
47 pinsrd $3, 76+4*4(%eax), E0 # load to uppermost 32-bit word 43 pinsrd $3, 76+4*4(%eax), E0 # load to uppermost 32-bit word
48 shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD 44 shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD
49 45
50 mova128 PSHUFFLE_BYTE_FLIP_MASK, SHUF_MASK 46 mova128 PSHUFFLE_BYTE_FLIP_MASK, %xmm7
47
48 movu128 0*16(%eax), MSG0
49 pshufb %xmm7, MSG0
50 movu128 1*16(%eax), MSG1
51 pshufb %xmm7, MSG1
52 movu128 2*16(%eax), MSG2
53 pshufb %xmm7, MSG2
54 movu128 3*16(%eax), MSG3
55 pshufb %xmm7, MSG3
51 56
52 /* Save hash values for addition after rounds */ 57 /* Save hash values for addition after rounds */
53 movu128 E0, 16(%esp) 58 movu128 E0, %xmm7
54 movu128 ABCD, (%esp) 59 movu128 ABCD, (%esp)
55 60
56 /* Rounds 0-3 */ 61 /* Rounds 0-3 */
57 movu128 0*16(%eax), MSG0
58 pshufb SHUF_MASK, MSG0
59 paddd MSG0, E0 62 paddd MSG0, E0
60 mova128 ABCD, E1 63 mova128 ABCD, E1
61 sha1rnds4 $0, E0, ABCD 64 sha1rnds4 $0, E0, ABCD
62 65
63 /* Rounds 4-7 */ 66 /* Rounds 4-7 */
64 movu128 1*16(%eax), MSG1
65 pshufb SHUF_MASK, MSG1
66 sha1nexte MSG1, E1 67 sha1nexte MSG1, E1
67 mova128 ABCD, E0 68 mova128 ABCD, E0
68 sha1rnds4 $0, E1, ABCD 69 sha1rnds4 $0, E1, ABCD
69 sha1msg1 MSG1, MSG0 70 sha1msg1 MSG1, MSG0
70 71
71 /* Rounds 8-11 */ 72 /* Rounds 8-11 */
72 movu128 2*16(%eax), MSG2
73 pshufb SHUF_MASK, MSG2
74 sha1nexte MSG2, E0 73 sha1nexte MSG2, E0
75 mova128 ABCD, E1 74 mova128 ABCD, E1
76 sha1rnds4 $0, E0, ABCD 75 sha1rnds4 $0, E0, ABCD
@@ -78,8 +77,6 @@ sha1_process_block64_shaNI:
78 xor128 MSG2, MSG0 77 xor128 MSG2, MSG0
79 78
80 /* Rounds 12-15 */ 79 /* Rounds 12-15 */
81 movu128 3*16(%eax), MSG3
82 pshufb SHUF_MASK, MSG3
83 sha1nexte MSG3, E1 80 sha1nexte MSG3, E1
84 mova128 ABCD, E0 81 mova128 ABCD, E0
85 sha1msg2 MSG3, MSG0 82 sha1msg2 MSG3, MSG0
@@ -210,16 +207,16 @@ sha1_process_block64_shaNI:
210 sha1rnds4 $3, E1, ABCD 207 sha1rnds4 $3, E1, ABCD
211 208
212 /* Add current hash values with previously saved */ 209 /* Add current hash values with previously saved */
213 sha1nexte 16(%esp), E0 210 sha1nexte %xmm7, E0
214 paddd (%esp), ABCD 211 movu128 (%esp), %xmm7
212 paddd %xmm7, ABCD
215 213
216 /* Write hash values back in the correct order */ 214 /* Write hash values back in the correct order */
217 shuf128_32 $0x1B, ABCD, ABCD 215 shuf128_32 $0x1B, ABCD, ABCD
218 movu128 ABCD, 76(%eax) 216 movu128 ABCD, 76(%eax)
219 extr128_32 $3, E0, 76+4*4(%eax) 217 extr128_32 $3, E0, 76+4*4(%eax)
220 218
221 movl %ebp, %esp 219 addl $16, %esp
222 popl %ebp
223 ret 220 ret
224 .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI 221 .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI
225 222
diff --git a/libbb/hash_md5_sha_x86-64_shaNI.S b/libbb/hash_md5_sha_x86-64_shaNI.S
index 8ddec87ce..fc2ca92e8 100644
--- a/libbb/hash_md5_sha_x86-64_shaNI.S
+++ b/libbb/hash_md5_sha_x86-64_shaNI.S
@@ -32,7 +32,6 @@
32#define MSG1 %xmm4 32#define MSG1 %xmm4
33#define MSG2 %xmm5 33#define MSG2 %xmm5
34#define MSG3 %xmm6 34#define MSG3 %xmm6
35#define SHUF_MASK %xmm7
36 35
37 .balign 8 # allow decoders to fetch at least 2 first insns 36 .balign 8 # allow decoders to fetch at least 2 first insns
38sha1_process_block64_shaNI: 37sha1_process_block64_shaNI:
@@ -43,30 +42,33 @@ sha1_process_block64_shaNI:
43 pinsrd $3, 80+4*4(%rdi), E0 # load to uppermost 32-bit word 42 pinsrd $3, 80+4*4(%rdi), E0 # load to uppermost 32-bit word
44 shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD 43 shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD
45 44
46 mova128 PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK 45 mova128 PSHUFFLE_BYTE_FLIP_MASK(%rip), %xmm7
46
47 movu128 0*16(%rdi), MSG0
48 pshufb %xmm7, MSG0
49 movu128 1*16(%rdi), MSG1
50 pshufb %xmm7, MSG1
51 movu128 2*16(%rdi), MSG2
52 pshufb %xmm7, MSG2
53 movu128 3*16(%rdi), MSG3
54 pshufb %xmm7, MSG3
47 55
48 /* Save hash values for addition after rounds */ 56 /* Save hash values for addition after rounds */
49 mova128 E0, %xmm9 57 mova128 E0, %xmm7
50 mova128 ABCD, %xmm8 58 mova128 ABCD, %xmm8
51 59
52 /* Rounds 0-3 */ 60 /* Rounds 0-3 */
53 movu128 0*16(%rdi), MSG0
54 pshufb SHUF_MASK, MSG0
55 paddd MSG0, E0 61 paddd MSG0, E0
56 mova128 ABCD, E1 62 mova128 ABCD, E1
57 sha1rnds4 $0, E0, ABCD 63 sha1rnds4 $0, E0, ABCD
58 64
59 /* Rounds 4-7 */ 65 /* Rounds 4-7 */
60 movu128 1*16(%rdi), MSG1
61 pshufb SHUF_MASK, MSG1
62 sha1nexte MSG1, E1 66 sha1nexte MSG1, E1
63 mova128 ABCD, E0 67 mova128 ABCD, E0
64 sha1rnds4 $0, E1, ABCD 68 sha1rnds4 $0, E1, ABCD
65 sha1msg1 MSG1, MSG0 69 sha1msg1 MSG1, MSG0
66 70
67 /* Rounds 8-11 */ 71 /* Rounds 8-11 */
68 movu128 2*16(%rdi), MSG2
69 pshufb SHUF_MASK, MSG2
70 sha1nexte MSG2, E0 72 sha1nexte MSG2, E0
71 mova128 ABCD, E1 73 mova128 ABCD, E1
72 sha1rnds4 $0, E0, ABCD 74 sha1rnds4 $0, E0, ABCD
@@ -74,8 +76,6 @@ sha1_process_block64_shaNI:
74 xor128 MSG2, MSG0 76 xor128 MSG2, MSG0
75 77
76 /* Rounds 12-15 */ 78 /* Rounds 12-15 */
77 movu128 3*16(%rdi), MSG3
78 pshufb SHUF_MASK, MSG3
79 sha1nexte MSG3, E1 79 sha1nexte MSG3, E1
80 mova128 ABCD, E0 80 mova128 ABCD, E0
81 sha1msg2 MSG3, MSG0 81 sha1msg2 MSG3, MSG0
@@ -206,7 +206,7 @@ sha1_process_block64_shaNI:
206 sha1rnds4 $3, E1, ABCD 206 sha1rnds4 $3, E1, ABCD
207 207
208 /* Add current hash values with previously saved */ 208 /* Add current hash values with previously saved */
209 sha1nexte %xmm9, E0 209 sha1nexte %xmm7, E0
210 paddd %xmm8, ABCD 210 paddd %xmm8, ABCD
211 211
212 /* Write hash values back in the correct order */ 212 /* Write hash values back in the correct order */