diff options
Diffstat (limited to 'libbb/hash_md5_sha_x86-64_shaNI.S')
-rw-r--r-- | libbb/hash_md5_sha_x86-64_shaNI.S | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/libbb/hash_md5_sha_x86-64_shaNI.S b/libbb/hash_md5_sha_x86-64_shaNI.S index b32029360..794e97040 100644 --- a/libbb/hash_md5_sha_x86-64_shaNI.S +++ b/libbb/hash_md5_sha_x86-64_shaNI.S | |||
@@ -4,7 +4,7 @@ | |||
4 | // We use shorter insns, even though they are for "wrong" | 4 | // We use shorter insns, even though they are for "wrong" |
5 | // data type (fp, not int). | 5 | // data type (fp, not int). |
6 | // For Intel, there is no penalty for doing it at all | 6 | // For Intel, there is no penalty for doing it at all |
7 | // (CPUs which do have such penalty do not support SHA1 insns). | 7 | // (CPUs which do have such penalty do not support SHA insns). |
8 | // For AMD, the penalty is one extra cycle | 8 | // For AMD, the penalty is one extra cycle |
9 | // (allegedly: I failed to find measurable difference). | 9 | // (allegedly: I failed to find measurable difference). |
10 | 10 | ||
@@ -20,6 +20,11 @@ | |||
20 | #define extr128_32 pextrd | 20 | #define extr128_32 pextrd |
21 | //#define extr128_32 extractps # not shorter | 21 | //#define extr128_32 extractps # not shorter |
22 | 22 | ||
23 | // pshufb is a SSSE3 insn. | ||
24 | // pinsrd, pextrd, extractps are SSE4.1 insns. | ||
25 | // We do not check SSSE3/SSE4.1 in cpuid, | ||
26 | // all SHA-capable CPUs support them as well. | ||
27 | |||
23 | .section .text.sha1_process_block64_shaNI, "ax", @progbits | 28 | .section .text.sha1_process_block64_shaNI, "ax", @progbits |
24 | .globl sha1_process_block64_shaNI | 29 | .globl sha1_process_block64_shaNI |
25 | .hidden sha1_process_block64_shaNI | 30 | .hidden sha1_process_block64_shaNI |
@@ -217,8 +222,8 @@ sha1_process_block64_shaNI: | |||
217 | .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI | 222 | .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI |
218 | 223 | ||
219 | .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 | 224 | .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 |
220 | .balign 16 | 225 | .balign 16 |
221 | PSHUFFLE_BYTE_FLIP_MASK: | 226 | PSHUFFLE_BYTE_FLIP_MASK: |
222 | .octa 0x000102030405060708090a0b0c0d0e0f | 227 | .octa 0x000102030405060708090a0b0c0d0e0f |
223 | 228 | ||
224 | #endif | 229 | #endif |