summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorjsing <>2023-05-28 17:42:30 +0000
committerjsing <>2023-05-28 17:42:30 +0000
commite074f5c2c19e5324112455428b2f4b66b012e9a6 (patch)
tree9baaeb2679c5b08aa46278d3bdc878bfdfdf2e61 /src
parent75bb8d3034d8e40b76dfa1377325b829cc58a395 (diff)
downloadopenbsd-e074f5c2c19e5324112455428b2f4b66b012e9a6.tar.gz
openbsd-e074f5c2c19e5324112455428b2f4b66b012e9a6.tar.bz2
openbsd-e074f5c2c19e5324112455428b2f4b66b012e9a6.zip
Provide optimised bn_mulw_{addw,addw_addw,addtw}() for aarch64.
This results in bn_mul_comba4() and bn_mul_comba8() requiring ~30% less instructions than they did previously.
Diffstat (limited to 'src')
-rw-r--r--src/lib/libcrypto/bn/arch/aarch64/bn_arch.h69
1 files changed, 68 insertions, 1 deletions
diff --git a/src/lib/libcrypto/bn/arch/aarch64/bn_arch.h b/src/lib/libcrypto/bn/arch/aarch64/bn_arch.h
index 1b9358e710..708083aaf2 100644
--- a/src/lib/libcrypto/bn/arch/aarch64/bn_arch.h
+++ b/src/lib/libcrypto/bn/arch/aarch64/bn_arch.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_arch.h,v 1.8 2023/05/28 17:22:04 jsing Exp $ */ 1/* $OpenBSD: bn_arch.h,v 1.9 2023/05/28 17:42:30 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -81,6 +81,73 @@ bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
81 *out_r0 = r0; 81 *out_r0 = r0;
82} 82}
83 83
84#define HAVE_BN_MULW_ADDW
85
86static inline void
87bn_mulw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
88 BN_ULONG *out_r0)
89{
90 BN_ULONG r1, r0;
91
92 __asm__ (
93 "umulh %[r1], %[a], %[b] \n"
94 "mul %[r0], %[a], %[b] \n"
95 "adds %[r0], %[r0], %[c] \n"
96 "adc %[r1], %[r1], xzr \n"
97 : [r1]"=&r"(r1), [r0]"=&r"(r0)
98 : [a]"r"(a), [b]"r"(b), [c]"r"(c)
99 : "cc");
100
101 *out_r1 = r1;
102 *out_r0 = r0;
103}
104
105#define HAVE_BN_MULW_ADDW_ADDW
106
107static inline void
108bn_mulw_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG d,
109 BN_ULONG *out_r1, BN_ULONG *out_r0)
110{
111 BN_ULONG r1, r0;
112
113 __asm__ (
114 "umulh %[r1], %[a], %[b] \n"
115 "mul %[r0], %[a], %[b] \n"
116 "adds %[r0], %[r0], %[c] \n"
117 "adc %[r1], %[r1], xzr \n"
118 "adds %[r0], %[r0], %[d] \n"
119 "adc %[r1], %[r1], xzr \n"
120 : [r1]"=&r"(r1), [r0]"=&r"(r0)
121 : [a]"r"(a), [b]"r"(b), [c]"r"(c), [d]"r"(d)
122 : "cc");
123
124 *out_r1 = r1;
125 *out_r0 = r0;
126}
127
128#define HAVE_BN_MULW_ADDTW
129
130static inline void
131bn_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
132 BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
133{
134 BN_ULONG r2, r1, r0;
135
136 __asm__ (
137 "umulh %[r1], %[a], %[b] \n"
138 "mul %[r0], %[a], %[b] \n"
139 "adds %[r0], %[r0], %[c0] \n"
140 "adcs %[r1], %[r1], %[c1] \n"
141 "adc %[r2], xzr, %[c2] \n"
142 : [r2]"=&r"(r2), [r1]"=&r"(r1), [r0]"=&r"(r0)
143 : [a]"r"(a), [b]"r"(b), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0)
144 : "cc");
145
146 *out_r2 = r2;
147 *out_r1 = r1;
148 *out_r0 = r0;
149}
150
84#define HAVE_BN_SUBW 151#define HAVE_BN_SUBW
85 152
86static inline void 153static inline void