diff options
author | jsing <> | 2023-05-28 17:42:30 +0000 |
---|---|---|
committer | jsing <> | 2023-05-28 17:42:30 +0000 |
commit | e074f5c2c19e5324112455428b2f4b66b012e9a6 (patch) | |
tree | 9baaeb2679c5b08aa46278d3bdc878bfdfdf2e61 /src | |
parent | 75bb8d3034d8e40b76dfa1377325b829cc58a395 (diff) | |
download | openbsd-e074f5c2c19e5324112455428b2f4b66b012e9a6.tar.gz openbsd-e074f5c2c19e5324112455428b2f4b66b012e9a6.tar.bz2 openbsd-e074f5c2c19e5324112455428b2f4b66b012e9a6.zip |
Provide optimised bn_mulw_{addw,addw_addw,addtw}() for aarch64.
This results in bn_mul_comba4() and bn_mul_comba8() requiring ~30% less
instructions than they did previously.
Diffstat (limited to 'src')
-rw-r--r-- | src/lib/libcrypto/bn/arch/aarch64/bn_arch.h | 69 |
1 files changed, 68 insertions, 1 deletions
diff --git a/src/lib/libcrypto/bn/arch/aarch64/bn_arch.h b/src/lib/libcrypto/bn/arch/aarch64/bn_arch.h index 1b9358e710..708083aaf2 100644 --- a/src/lib/libcrypto/bn/arch/aarch64/bn_arch.h +++ b/src/lib/libcrypto/bn/arch/aarch64/bn_arch.h | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_arch.h,v 1.8 2023/05/28 17:22:04 jsing Exp $ */ | 1 | /* $OpenBSD: bn_arch.h,v 1.9 2023/05/28 17:42:30 jsing Exp $ */ |
2 | /* | 2 | /* |
3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
4 | * | 4 | * |
@@ -81,6 +81,73 @@ bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0) | |||
81 | *out_r0 = r0; | 81 | *out_r0 = r0; |
82 | } | 82 | } |
83 | 83 | ||
84 | #define HAVE_BN_MULW_ADDW | ||
85 | |||
86 | static inline void | ||
87 | bn_mulw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1, | ||
88 | BN_ULONG *out_r0) | ||
89 | { | ||
90 | BN_ULONG r1, r0; | ||
91 | |||
92 | __asm__ ( | ||
93 | "umulh %[r1], %[a], %[b] \n" | ||
94 | "mul %[r0], %[a], %[b] \n" | ||
95 | "adds %[r0], %[r0], %[c] \n" | ||
96 | "adc %[r1], %[r1], xzr \n" | ||
97 | : [r1]"=&r"(r1), [r0]"=&r"(r0) | ||
98 | : [a]"r"(a), [b]"r"(b), [c]"r"(c) | ||
99 | : "cc"); | ||
100 | |||
101 | *out_r1 = r1; | ||
102 | *out_r0 = r0; | ||
103 | } | ||
104 | |||
105 | #define HAVE_BN_MULW_ADDW_ADDW | ||
106 | |||
107 | static inline void | ||
108 | bn_mulw_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG d, | ||
109 | BN_ULONG *out_r1, BN_ULONG *out_r0) | ||
110 | { | ||
111 | BN_ULONG r1, r0; | ||
112 | |||
113 | __asm__ ( | ||
114 | "umulh %[r1], %[a], %[b] \n" | ||
115 | "mul %[r0], %[a], %[b] \n" | ||
116 | "adds %[r0], %[r0], %[c] \n" | ||
117 | "adc %[r1], %[r1], xzr \n" | ||
118 | "adds %[r0], %[r0], %[d] \n" | ||
119 | "adc %[r1], %[r1], xzr \n" | ||
120 | : [r1]"=&r"(r1), [r0]"=&r"(r0) | ||
121 | : [a]"r"(a), [b]"r"(b), [c]"r"(c), [d]"r"(d) | ||
122 | : "cc"); | ||
123 | |||
124 | *out_r1 = r1; | ||
125 | *out_r0 = r0; | ||
126 | } | ||
127 | |||
128 | #define HAVE_BN_MULW_ADDTW | ||
129 | |||
130 | static inline void | ||
131 | bn_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0, | ||
132 | BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0) | ||
133 | { | ||
134 | BN_ULONG r2, r1, r0; | ||
135 | |||
136 | __asm__ ( | ||
137 | "umulh %[r1], %[a], %[b] \n" | ||
138 | "mul %[r0], %[a], %[b] \n" | ||
139 | "adds %[r0], %[r0], %[c0] \n" | ||
140 | "adcs %[r1], %[r1], %[c1] \n" | ||
141 | "adc %[r2], xzr, %[c2] \n" | ||
142 | : [r2]"=&r"(r2), [r1]"=&r"(r1), [r0]"=&r"(r0) | ||
143 | : [a]"r"(a), [b]"r"(b), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0) | ||
144 | : "cc"); | ||
145 | |||
146 | *out_r2 = r2; | ||
147 | *out_r1 = r1; | ||
148 | *out_r0 = r0; | ||
149 | } | ||
150 | |||
84 | #define HAVE_BN_SUBW | 151 | #define HAVE_BN_SUBW |
85 | 152 | ||
86 | static inline void | 153 | static inline void |