diff options
| author | jsing <> | 2024-10-18 13:36:24 +0000 |
|---|---|---|
| committer | jsing <> | 2024-10-18 13:36:24 +0000 |
| commit | c3adbc1c81adde9927d8537128bb9cf20db03c1c (patch) | |
| tree | 9f24e49a607732139a986f4abb3735d101835046 /src | |
| parent | 1a7fd7787292c0192e0f8e7889d78be38bf8c9c2 (diff) | |
| download | openbsd-c3adbc1c81adde9927d8537128bb9cf20db03c1c.tar.gz openbsd-c3adbc1c81adde9927d8537128bb9cf20db03c1c.tar.bz2 openbsd-c3adbc1c81adde9927d8537128bb9cf20db03c1c.zip | |
Provide crypto_cpu_caps_init() for amd64.
This is a CPU capability detection implementation in C, with minimal
inline assembly (for cpuid and xgetbv). This replaces the assembly
mess generated by x86_64cpuid.pl. Rather than populating OPENSSL_ia32cap_P
directly with CPUID output, just set the bits that the remaining
perlasm checks (namely AESNI, AVX, FXSR, INTEL, HT, MMX, PCLMUL, SSE, SSE2
and SSSE3).
ok joshua@ tb@
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib/libcrypto/arch/amd64/Makefile.inc | 12 | ||||
| -rw-r--r-- | src/lib/libcrypto/arch/amd64/crypto_arch.h | 4 | ||||
| -rw-r--r-- | src/lib/libcrypto/arch/amd64/crypto_cpu_caps.c | 114 | ||||
| -rw-r--r-- | src/lib/libcrypto/x86_arch.h | 7 |
4 files changed, 126 insertions, 11 deletions
diff --git a/src/lib/libcrypto/arch/amd64/Makefile.inc b/src/lib/libcrypto/arch/amd64/Makefile.inc index dd136f76a7..2f41f44381 100644 --- a/src/lib/libcrypto/arch/amd64/Makefile.inc +++ b/src/lib/libcrypto/arch/amd64/Makefile.inc | |||
| @@ -1,10 +1,12 @@ | |||
| 1 | # $OpenBSD: Makefile.inc,v 1.29 2024/08/11 13:02:39 jsing Exp $ | 1 | # $OpenBSD: Makefile.inc,v 1.30 2024/10/18 13:36:24 jsing Exp $ |
| 2 | 2 | ||
| 3 | # amd64-specific libcrypto build rules | 3 | # amd64-specific libcrypto build rules |
| 4 | 4 | ||
| 5 | # all amd64 code generators use this | 5 | # all amd64 code generators use this |
| 6 | EXTRA_PL = ${LCRYPTO_SRC}/perlasm/x86_64-xlate.pl | 6 | EXTRA_PL = ${LCRYPTO_SRC}/perlasm/x86_64-xlate.pl |
| 7 | 7 | ||
| 8 | SRCS += crypto_cpu_caps.c | ||
| 9 | |||
| 8 | # aes | 10 | # aes |
| 9 | CFLAGS+= -DAES_ASM | 11 | CFLAGS+= -DAES_ASM |
| 10 | SSLASM+= aes aes-x86_64 | 12 | SSLASM+= aes aes-x86_64 |
| @@ -69,12 +71,4 @@ ${f}.S: ${LCRYPTO_SRC}/${dir}/asm/${f}.pl ${EXTRA_PL} | |||
| 69 | /usr/bin/perl ./asm/${f}.pl openbsd) > ${.TARGET} | 71 | /usr/bin/perl ./asm/${f}.pl openbsd) > ${.TARGET} |
| 70 | .endfor | 72 | .endfor |
| 71 | 73 | ||
| 72 | CFLAGS+= -DOPENSSL_CPUID_OBJ | ||
| 73 | SRCS+= x86_64cpuid.S | ||
| 74 | GENERATED+=x86_64cpuid.S | ||
| 75 | |||
| 76 | x86_64cpuid.S: ${LCRYPTO_SRC}/x86_64cpuid.pl ${EXTRA_PL} | ||
| 77 | (cd ${LCRYPTO_SRC}/${dir} ; \ | ||
| 78 | /usr/bin/perl ./x86_64cpuid.pl) > ${.TARGET} | ||
| 79 | |||
| 80 | CFLAGS+=-fret-clean | 74 | CFLAGS+=-fret-clean |
diff --git a/src/lib/libcrypto/arch/amd64/crypto_arch.h b/src/lib/libcrypto/arch/amd64/crypto_arch.h index 8e91c25529..6feeaa209e 100644 --- a/src/lib/libcrypto/arch/amd64/crypto_arch.h +++ b/src/lib/libcrypto/arch/amd64/crypto_arch.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: crypto_arch.h,v 1.1 2024/08/11 13:02:39 jsing Exp $ */ | 1 | /* $OpenBSD: crypto_arch.h,v 1.2 2024/10/18 13:36:24 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -18,6 +18,8 @@ | |||
| 18 | #ifndef HEADER_CRYPTO_ARCH_H | 18 | #ifndef HEADER_CRYPTO_ARCH_H |
| 19 | #define HEADER_CRYPTO_ARCH_H | 19 | #define HEADER_CRYPTO_ARCH_H |
| 20 | 20 | ||
| 21 | #define HAVE_CRYPTO_CPU_CAPS_INIT | ||
| 22 | |||
| 21 | #ifndef OPENSSL_NO_ASM | 23 | #ifndef OPENSSL_NO_ASM |
| 22 | 24 | ||
| 23 | #define HAVE_AES_CBC_ENCRYPT_INTERNAL | 25 | #define HAVE_AES_CBC_ENCRYPT_INTERNAL |
diff --git a/src/lib/libcrypto/arch/amd64/crypto_cpu_caps.c b/src/lib/libcrypto/arch/amd64/crypto_cpu_caps.c new file mode 100644 index 0000000000..8cbf24edbd --- /dev/null +++ b/src/lib/libcrypto/arch/amd64/crypto_cpu_caps.c | |||
| @@ -0,0 +1,114 @@ | |||
| 1 | /* $OpenBSD: crypto_cpu_caps.c,v 1.1 2024/10/18 13:36:24 jsing Exp $ */ | ||
| 2 | /* | ||
| 3 | * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> | ||
| 4 | * | ||
| 5 | * Permission to use, copy, modify, and distribute this software for any | ||
| 6 | * purpose with or without fee is hereby granted, provided that the above | ||
| 7 | * copyright notice and this permission notice appear in all copies. | ||
| 8 | * | ||
| 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #include <stdio.h> | ||
| 19 | |||
| 20 | #include <openssl/crypto.h> | ||
| 21 | |||
| 22 | #include "x86_arch.h" | ||
| 23 | |||
| 24 | /* Legacy architecture specific capabilities, used by perlasm. */ | ||
| 25 | extern uint64_t OPENSSL_ia32cap_P; | ||
| 26 | |||
| 27 | /* Machine independent CPU capabilities. */ | ||
| 28 | extern uint64_t crypto_cpu_caps; | ||
| 29 | |||
| 30 | static inline void | ||
| 31 | cpuid(uint32_t eax, uint32_t *out_eax, uint32_t *out_ebx, uint32_t *out_ecx, | ||
| 32 | uint32_t *out_edx) | ||
| 33 | { | ||
| 34 | uint32_t ebx = 0, ecx = 0, edx = 0; | ||
| 35 | |||
| 36 | #ifndef OPENSSL_NO_ASM | ||
| 37 | __asm__ ("cpuid": "+a"(eax), "+b"(ebx), "+c"(ecx), "+d"(edx)); | ||
| 38 | #else | ||
| 39 | eax = 0; | ||
| 40 | #endif | ||
| 41 | |||
| 42 | if (out_eax != NULL) | ||
| 43 | *out_eax = eax; | ||
| 44 | if (out_ebx != NULL) | ||
| 45 | *out_ebx = ebx; | ||
| 46 | if (out_ebx != NULL) | ||
| 47 | *out_ecx = ecx; | ||
| 48 | if (out_edx != NULL) | ||
| 49 | *out_edx = edx; | ||
| 50 | } | ||
| 51 | |||
| 52 | static inline void | ||
| 53 | xgetbv(uint32_t ecx, uint32_t *out_eax, uint32_t *out_edx) | ||
| 54 | { | ||
| 55 | uint32_t eax = 0, edx = 0; | ||
| 56 | |||
| 57 | #ifndef OPENSSL_NO_ASM | ||
| 58 | __asm__ ("xgetbv": "+a"(eax), "+c"(ecx), "+d"(edx)); | ||
| 59 | #endif | ||
| 60 | |||
| 61 | if (out_eax != NULL) | ||
| 62 | *out_eax = eax; | ||
| 63 | if (out_edx != NULL) | ||
| 64 | *out_edx = edx; | ||
| 65 | } | ||
| 66 | |||
| 67 | void | ||
| 68 | crypto_cpu_caps_init(void) | ||
| 69 | { | ||
| 70 | uint32_t eax, ebx, ecx, edx; | ||
| 71 | uint64_t caps = 0; | ||
| 72 | |||
| 73 | cpuid(0, &eax, &ebx, &ecx, &edx); | ||
| 74 | |||
| 75 | /* "GenuineIntel" in little endian. */ | ||
| 76 | if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) | ||
| 77 | caps |= CPUCAP_MASK_INTEL; | ||
| 78 | |||
| 79 | if (eax < 1) | ||
| 80 | return; | ||
| 81 | |||
| 82 | cpuid(1, &eax, &ebx, &ecx, &edx); | ||
| 83 | |||
| 84 | if ((edx & IA32CAP_MASK0_FXSR) != 0) | ||
| 85 | caps |= CPUCAP_MASK_FXSR; | ||
| 86 | if ((edx & IA32CAP_MASK0_HT) != 0) | ||
| 87 | caps |= CPUCAP_MASK_HT; | ||
| 88 | if ((edx & IA32CAP_MASK0_MMX) != 0) | ||
| 89 | caps |= CPUCAP_MASK_MMX; | ||
| 90 | if ((edx & IA32CAP_MASK0_SSE) != 0) | ||
| 91 | caps |= CPUCAP_MASK_SSE; | ||
| 92 | if ((edx & IA32CAP_MASK0_SSE2) != 0) | ||
| 93 | caps |= CPUCAP_MASK_SSE2; | ||
| 94 | |||
| 95 | if ((ecx & IA32CAP_MASK1_AESNI) != 0) | ||
| 96 | caps |= CPUCAP_MASK_AESNI; | ||
| 97 | if ((ecx & IA32CAP_MASK1_PCLMUL) != 0) | ||
| 98 | caps |= CPUCAP_MASK_PCLMUL; | ||
| 99 | if ((ecx & IA32CAP_MASK1_SSSE3) != 0) | ||
| 100 | caps |= CPUCAP_MASK_SSSE3; | ||
| 101 | |||
| 102 | /* AVX requires OSXSAVE and XMM/YMM state to be enabled. */ | ||
| 103 | if ((ecx & IA32CAP_MASK1_OSXSAVE) != 0) { | ||
| 104 | xgetbv(0, &eax, NULL); | ||
| 105 | if (((eax >> 1) & 3) == 3 && (ecx & IA32CAP_MASK1_AVX) != 0) | ||
| 106 | caps |= CPUCAP_MASK_AVX; | ||
| 107 | } | ||
| 108 | |||
| 109 | /* Set machine independent CPU capabilities. */ | ||
| 110 | if ((caps & CPUCAP_MASK_AESNI) != 0) | ||
| 111 | crypto_cpu_caps |= CRYPTO_CPU_CAPS_ACCELERATED_AES; | ||
| 112 | |||
| 113 | OPENSSL_ia32cap_P = caps; | ||
| 114 | } | ||
diff --git a/src/lib/libcrypto/x86_arch.h b/src/lib/libcrypto/x86_arch.h index 5b2cf97546..e9e9d48960 100644 --- a/src/lib/libcrypto/x86_arch.h +++ b/src/lib/libcrypto/x86_arch.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: x86_arch.h,v 1.1 2016/11/04 17:30:30 miod Exp $ */ | 1 | /* $OpenBSD: x86_arch.h,v 1.2 2024/10/18 13:36:24 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2016 Miodrag Vallat. | 3 | * Copyright (c) 2016 Miodrag Vallat. |
| 4 | * | 4 | * |
| @@ -76,15 +76,20 @@ | |||
| 76 | #define IA32CAP_MASK1_SSSE3 (1 << IA32CAP_BIT1_SSSE3) | 76 | #define IA32CAP_MASK1_SSSE3 (1 << IA32CAP_BIT1_SSSE3) |
| 77 | #define IA32CAP_MASK1_FMA3 (1 << IA32CAP_BIT1_FMA3) | 77 | #define IA32CAP_MASK1_FMA3 (1 << IA32CAP_BIT1_FMA3) |
| 78 | #define IA32CAP_MASK1_AESNI (1 << IA32CAP_BIT1_AESNI) | 78 | #define IA32CAP_MASK1_AESNI (1 << IA32CAP_BIT1_AESNI) |
| 79 | #define IA32CAP_MASK1_OSXSAVE (1 << IA32CAP_BIT1_OSXSAVE) | ||
| 79 | #define IA32CAP_MASK1_AVX (1 << IA32CAP_BIT1_AVX) | 80 | #define IA32CAP_MASK1_AVX (1 << IA32CAP_BIT1_AVX) |
| 80 | 81 | ||
| 81 | #define IA32CAP_MASK1_AMD_XOP (1 << IA32CAP_BIT1_AMD_XOP) | 82 | #define IA32CAP_MASK1_AMD_XOP (1 << IA32CAP_BIT1_AMD_XOP) |
| 82 | 83 | ||
| 83 | /* bit masks for OPENSSL_cpu_caps() */ | 84 | /* bit masks for OPENSSL_cpu_caps() */ |
| 85 | #define CPUCAP_MASK_HT IA32CAP_MASK0_HT | ||
| 84 | #define CPUCAP_MASK_MMX IA32CAP_MASK0_MMX | 86 | #define CPUCAP_MASK_MMX IA32CAP_MASK0_MMX |
| 85 | #define CPUCAP_MASK_FXSR IA32CAP_MASK0_FXSR | 87 | #define CPUCAP_MASK_FXSR IA32CAP_MASK0_FXSR |
| 86 | #define CPUCAP_MASK_SSE IA32CAP_MASK0_SSE | 88 | #define CPUCAP_MASK_SSE IA32CAP_MASK0_SSE |
| 89 | #define CPUCAP_MASK_SSE2 IA32CAP_MASK0_SSE2 | ||
| 90 | #define CPUCAP_MASK_INTEL IA32CAP_MASK0_INTEL | ||
| 87 | #define CPUCAP_MASK_INTELP4 IA32CAP_MASK0_INTELP4 | 91 | #define CPUCAP_MASK_INTELP4 IA32CAP_MASK0_INTELP4 |
| 88 | #define CPUCAP_MASK_PCLMUL (1ULL << (32 + IA32CAP_BIT1_PCLMUL)) | 92 | #define CPUCAP_MASK_PCLMUL (1ULL << (32 + IA32CAP_BIT1_PCLMUL)) |
| 89 | #define CPUCAP_MASK_SSSE3 (1ULL << (32 + IA32CAP_BIT1_SSSE3)) | 93 | #define CPUCAP_MASK_SSSE3 (1ULL << (32 + IA32CAP_BIT1_SSSE3)) |
| 90 | #define CPUCAP_MASK_AESNI (1ULL << (32 + IA32CAP_BIT1_AESNI)) | 94 | #define CPUCAP_MASK_AESNI (1ULL << (32 + IA32CAP_BIT1_AESNI)) |
| 95 | #define CPUCAP_MASK_AVX (1ULL << (32 + IA32CAP_BIT1_AVX)) | ||
