From 1c5e77a1d6f97589e2bca622f3313c1418f9a535 Mon Sep 17 00:00:00 2001 From: jsing <> Date: Fri, 7 Jul 2023 16:10:32 +0000 Subject: Provide optimised bn_mulw() for riscv64. This provides a 1.5-2x performance gain for BN multiplication, with a similar improvement being seen for RSA operations. --- src/lib/libcrypto/bn/arch/riscv64/bn_arch.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/lib/libcrypto/bn/arch/riscv64/bn_arch.h b/src/lib/libcrypto/bn/arch/riscv64/bn_arch.h index 354774cde3..66256acad0 100644 --- a/src/lib/libcrypto/bn/arch/riscv64/bn_arch.h +++ b/src/lib/libcrypto/bn/arch/riscv64/bn_arch.h @@ -1,4 +1,4 @@ -/* $OpenBSD: bn_arch.h,v 1.4 2023/02/16 10:41:03 jsing Exp $ */ +/* $OpenBSD: bn_arch.h,v 1.5 2023/07/07 16:10:32 jsing Exp $ */ /* * Copyright (c) 2023 Joel Sing * @@ -15,15 +15,17 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include + #ifndef HEADER_BN_ARCH_H #define HEADER_BN_ARCH_H #ifndef OPENSSL_NO_ASM -#if 0 /* Needs testing and enabling. */ #if defined(__GNUC__) -#define HAVE_BN_MULW +#define HAVE_BN_MULW + static inline void bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0) { @@ -34,15 +36,17 @@ bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0) * of these instructions is important, as they can potentially be fused * into a single operation. */ - __asm__ ("mulh %0, %2, %3; mul %1, %2, %3" - : "=&r"(r1), "=r"(r0) - : "r"(a), "r"(b)); + __asm__ ( + "mulhu %[r1], %[a], %[b] \n" + "mul %[r0], %[a], %[b] \n" + : [r1]"=&r"(r1), [r0]"=r"(r0) + : [a]"r"(a), [b]"r"(b)); *out_r1 = r1; *out_r0 = r0; } + #endif /* __GNUC__ */ -#endif #endif #endif -- cgit v1.2.3-55-g6feb