aboutsummaryrefslogtreecommitdiff
path: root/libbb/hash_md5_sha_x86-64_shaNI.S
diff options
context:
space:
mode:
Diffstat (limited to 'libbb/hash_md5_sha_x86-64_shaNI.S')
-rw-r--r--libbb/hash_md5_sha_x86-64_shaNI.S11
1 files changed, 8 insertions, 3 deletions
diff --git a/libbb/hash_md5_sha_x86-64_shaNI.S b/libbb/hash_md5_sha_x86-64_shaNI.S
index b32029360..794e97040 100644
--- a/libbb/hash_md5_sha_x86-64_shaNI.S
+++ b/libbb/hash_md5_sha_x86-64_shaNI.S
@@ -4,7 +4,7 @@
4// We use shorter insns, even though they are for "wrong" 4// We use shorter insns, even though they are for "wrong"
5// data type (fp, not int). 5// data type (fp, not int).
6// For Intel, there is no penalty for doing it at all 6// For Intel, there is no penalty for doing it at all
7// (CPUs which do have such penalty do not support SHA1 insns). 7// (CPUs which do have such penalty do not support SHA insns).
8// For AMD, the penalty is one extra cycle 8// For AMD, the penalty is one extra cycle
9// (allegedly: I failed to find measurable difference). 9// (allegedly: I failed to find measurable difference).
10 10
@@ -20,6 +20,11 @@
20#define extr128_32 pextrd 20#define extr128_32 pextrd
21//#define extr128_32 extractps # not shorter 21//#define extr128_32 extractps # not shorter
22 22
23// pshufb is a SSSE3 insn.
24// pinsrd, pextrd, extractps are SSE4.1 insns.
25// We do not check SSSE3/SSE4.1 in cpuid,
26// all SHA-capable CPUs support them as well.
27
23 .section .text.sha1_process_block64_shaNI, "ax", @progbits 28 .section .text.sha1_process_block64_shaNI, "ax", @progbits
24 .globl sha1_process_block64_shaNI 29 .globl sha1_process_block64_shaNI
25 .hidden sha1_process_block64_shaNI 30 .hidden sha1_process_block64_shaNI
@@ -217,8 +222,8 @@ sha1_process_block64_shaNI:
217 .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI 222 .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI
218 223
219 .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 224 .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
220 .balign 16 225 .balign 16
221PSHUFFLE_BYTE_FLIP_MASK: 226PSHUFFLE_BYTE_FLIP_MASK:
222 .octa 0x000102030405060708090a0b0c0d0e0f 227 .octa 0x000102030405060708090a0b0c0d0e0f
223 228
224#endif 229#endif