From c3adbc1c81adde9927d8537128bb9cf20db03c1c Mon Sep 17 00:00:00 2001
From: jsing <>
Date: Fri, 18 Oct 2024 13:36:24 +0000
Subject: Provide crypto_cpu_caps_init() for amd64.

This is a CPU capability detection implementation in C, with minimal
inline assembly (for cpuid and xgetbv). This replaces the assembly
mess generated by x86_64cpuid.pl. Rather than populating OPENSSL_ia32cap_P
directly with CPUID output, just set the bits that the remaining
perlasm checks (namely AESNI, AVX, FXSR, INTEL, HT, MMX, PCLMUL, SSE, SSE2
and SSSE3).

ok joshua@ tb@
---
 src/lib/libcrypto/arch/amd64/Makefile.inc      |  12 +--
 src/lib/libcrypto/arch/amd64/crypto_arch.h     |   4 +-
 src/lib/libcrypto/arch/amd64/crypto_cpu_caps.c | 114 +++++++++++++++++++++++++
 src/lib/libcrypto/x86_arch.h                   |   7 +-
 4 files changed, 126 insertions(+), 11 deletions(-)
 create mode 100644 src/lib/libcrypto/arch/amd64/crypto_cpu_caps.c

diff --git a/src/lib/libcrypto/arch/amd64/Makefile.inc b/src/lib/libcrypto/arch/amd64/Makefile.inc
index dd136f76a7..2f41f44381 100644
--- a/src/lib/libcrypto/arch/amd64/Makefile.inc
+++ b/src/lib/libcrypto/arch/amd64/Makefile.inc
@@ -1,10 +1,12 @@
-# $OpenBSD: Makefile.inc,v 1.29 2024/08/11 13:02:39 jsing Exp $
+# $OpenBSD: Makefile.inc,v 1.30 2024/10/18 13:36:24 jsing Exp $
 
 # amd64-specific libcrypto build rules
 
 # all amd64 code generators use this
 EXTRA_PL =	${LCRYPTO_SRC}/perlasm/x86_64-xlate.pl
 
+SRCS += crypto_cpu_caps.c
+
 # aes
 CFLAGS+= -DAES_ASM
 SSLASM+= aes aes-x86_64
@@ -69,12 +71,4 @@ ${f}.S: ${LCRYPTO_SRC}/${dir}/asm/${f}.pl ${EXTRA_PL}
 		/usr/bin/perl ./asm/${f}.pl openbsd) > ${.TARGET}
 .endfor
 
-CFLAGS+= -DOPENSSL_CPUID_OBJ
-SRCS+=	x86_64cpuid.S
-GENERATED+=x86_64cpuid.S
-
-x86_64cpuid.S: ${LCRYPTO_SRC}/x86_64cpuid.pl ${EXTRA_PL}
-	(cd ${LCRYPTO_SRC}/${dir} ; \
-		/usr/bin/perl ./x86_64cpuid.pl) > ${.TARGET}
-
 CFLAGS+=-fret-clean
diff --git a/src/lib/libcrypto/arch/amd64/crypto_arch.h b/src/lib/libcrypto/arch/amd64/crypto_arch.h
index 8e91c25529..6feeaa209e 100644
--- a/src/lib/libcrypto/arch/amd64/crypto_arch.h
+++ b/src/lib/libcrypto/arch/amd64/crypto_arch.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: crypto_arch.h,v 1.1 2024/08/11 13:02:39 jsing Exp $ */
+/*	$OpenBSD: crypto_arch.h,v 1.2 2024/10/18 13:36:24 jsing Exp $ */
 /*
  * Copyright (c) 2024 Joel Sing <jsing@openbsd.org>
  *
@@ -18,6 +18,8 @@
 #ifndef HEADER_CRYPTO_ARCH_H
 #define HEADER_CRYPTO_ARCH_H
 
+#define HAVE_CRYPTO_CPU_CAPS_INIT
+
 #ifndef OPENSSL_NO_ASM
 
 #define HAVE_AES_CBC_ENCRYPT_INTERNAL
diff --git a/src/lib/libcrypto/arch/amd64/crypto_cpu_caps.c b/src/lib/libcrypto/arch/amd64/crypto_cpu_caps.c
new file mode 100644
index 0000000000..8cbf24edbd
--- /dev/null
+++ b/src/lib/libcrypto/arch/amd64/crypto_cpu_caps.c
@@ -0,0 +1,114 @@
+/*	$OpenBSD: crypto_cpu_caps.c,v 1.1 2024/10/18 13:36:24 jsing Exp $ */
+/*
+ * Copyright (c) 2024 Joel Sing <jsing@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stdio.h>
+
+#include <openssl/crypto.h>
+
+#include "x86_arch.h"
+
+/* Legacy architecture specific capabilities, used by perlasm. */
+extern uint64_t OPENSSL_ia32cap_P;
+
+/* Machine independent CPU capabilities. */
+extern uint64_t crypto_cpu_caps;
+
+static inline void
+cpuid(uint32_t eax, uint32_t *out_eax, uint32_t *out_ebx, uint32_t *out_ecx,
+    uint32_t *out_edx)
+{
+	uint32_t ebx = 0, ecx = 0, edx = 0;
+
+#ifndef OPENSSL_NO_ASM
+	__asm__ ("cpuid": "+a"(eax), "+b"(ebx), "+c"(ecx), "+d"(edx));
+#else
+	eax = 0;
+#endif
+
+	if (out_eax != NULL)
+		*out_eax = eax;
+	if (out_ebx != NULL)
+		*out_ebx = ebx;
+	if (out_ebx != NULL)
+		*out_ecx = ecx;
+	if (out_edx != NULL)
+		*out_edx = edx;
+}
+
+static inline void
+xgetbv(uint32_t ecx, uint32_t *out_eax, uint32_t *out_edx)
+{
+	uint32_t eax = 0, edx = 0;
+
+#ifndef OPENSSL_NO_ASM
+	__asm__ ("xgetbv": "+a"(eax), "+c"(ecx), "+d"(edx));
+#endif
+
+	if (out_eax != NULL)
+		*out_eax = eax;
+	if (out_edx != NULL)
+		*out_edx = edx;
+}
+
+void
+crypto_cpu_caps_init(void)
+{
+	uint32_t eax, ebx, ecx, edx;
+	uint64_t caps = 0;
+
+	cpuid(0, &eax, &ebx, &ecx, &edx);
+
+	/* "GenuineIntel" in little endian. */
+	if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
+		caps |= CPUCAP_MASK_INTEL;
+
+	if (eax < 1)
+		return;
+
+	cpuid(1, &eax, &ebx, &ecx, &edx);
+
+	if ((edx & IA32CAP_MASK0_FXSR) != 0)
+		caps |= CPUCAP_MASK_FXSR;
+	if ((edx & IA32CAP_MASK0_HT) != 0)
+		caps |= CPUCAP_MASK_HT;
+	if ((edx & IA32CAP_MASK0_MMX) != 0)
+		caps |= CPUCAP_MASK_MMX;
+	if ((edx & IA32CAP_MASK0_SSE) != 0)
+		caps |= CPUCAP_MASK_SSE;
+	if ((edx & IA32CAP_MASK0_SSE2) != 0)
+		caps |= CPUCAP_MASK_SSE2;
+
+	if ((ecx & IA32CAP_MASK1_AESNI) != 0)
+		caps |= CPUCAP_MASK_AESNI;
+	if ((ecx & IA32CAP_MASK1_PCLMUL) != 0)
+		caps |= CPUCAP_MASK_PCLMUL;
+	if ((ecx & IA32CAP_MASK1_SSSE3) != 0)
+		caps |= CPUCAP_MASK_SSSE3;
+
+	/* AVX requires OSXSAVE and XMM/YMM state to be enabled. */
+	if ((ecx & IA32CAP_MASK1_OSXSAVE) != 0) {
+		xgetbv(0, &eax, NULL);
+		if (((eax >> 1) & 3) == 3 && (ecx & IA32CAP_MASK1_AVX) != 0)
+			caps |= CPUCAP_MASK_AVX;
+	}
+
+	/* Set machine independent CPU capabilities. */
+	if ((caps & CPUCAP_MASK_AESNI) != 0)
+		crypto_cpu_caps |= CRYPTO_CPU_CAPS_ACCELERATED_AES;
+
+	OPENSSL_ia32cap_P = caps;
+}
diff --git a/src/lib/libcrypto/x86_arch.h b/src/lib/libcrypto/x86_arch.h
index 5b2cf97546..e9e9d48960 100644
--- a/src/lib/libcrypto/x86_arch.h
+++ b/src/lib/libcrypto/x86_arch.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: x86_arch.h,v 1.1 2016/11/04 17:30:30 miod Exp $	*/
+/*	$OpenBSD: x86_arch.h,v 1.2 2024/10/18 13:36:24 jsing Exp $	*/
 /*
  * Copyright (c) 2016 Miodrag Vallat.
  *
@@ -76,15 +76,20 @@
 #define	IA32CAP_MASK1_SSSE3	(1 << IA32CAP_BIT1_SSSE3)
 #define	IA32CAP_MASK1_FMA3	(1 << IA32CAP_BIT1_FMA3)
 #define	IA32CAP_MASK1_AESNI	(1 << IA32CAP_BIT1_AESNI)
+#define	IA32CAP_MASK1_OSXSAVE	(1 << IA32CAP_BIT1_OSXSAVE)
 #define	IA32CAP_MASK1_AVX	(1 << IA32CAP_BIT1_AVX)
 
 #define	IA32CAP_MASK1_AMD_XOP	(1 << IA32CAP_BIT1_AMD_XOP)
 
 /* bit masks for OPENSSL_cpu_caps() */
+#define	CPUCAP_MASK_HT		IA32CAP_MASK0_HT
 #define	CPUCAP_MASK_MMX		IA32CAP_MASK0_MMX
 #define	CPUCAP_MASK_FXSR	IA32CAP_MASK0_FXSR
 #define	CPUCAP_MASK_SSE		IA32CAP_MASK0_SSE
+#define	CPUCAP_MASK_SSE2	IA32CAP_MASK0_SSE2
+#define	CPUCAP_MASK_INTEL	IA32CAP_MASK0_INTEL
 #define	CPUCAP_MASK_INTELP4	IA32CAP_MASK0_INTELP4
 #define	CPUCAP_MASK_PCLMUL	(1ULL << (32 + IA32CAP_BIT1_PCLMUL))
 #define	CPUCAP_MASK_SSSE3	(1ULL << (32 + IA32CAP_BIT1_SSSE3))
 #define	CPUCAP_MASK_AESNI	(1ULL << (32 + IA32CAP_BIT1_AESNI))
+#define	CPUCAP_MASK_AVX		(1ULL << (32 + IA32CAP_BIT1_AVX))
-- 
cgit v1.2.3-55-g6feb