diff options
| author | jsing <> | 2023-05-28 17:42:30 +0000 |
|---|---|---|
| committer | jsing <> | 2023-05-28 17:42:30 +0000 |
| commit | 847bb6ade26d4856fd05909787fd82194b263387 (patch) | |
| tree | 9baaeb2679c5b08aa46278d3bdc878bfdfdf2e61 /src | |
| parent | 3b16059743e4e2f0b44353e1ae9dd45339145f76 (diff) | |
| download | openbsd-847bb6ade26d4856fd05909787fd82194b263387.tar.gz openbsd-847bb6ade26d4856fd05909787fd82194b263387.tar.bz2 openbsd-847bb6ade26d4856fd05909787fd82194b263387.zip | |
Provide optimised bn_mulw_{addw,addw_addw,addtw}() for aarch64.
This results in bn_mul_comba4() and bn_mul_comba8() requiring ~30% less
instructions than they did previously.
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib/libcrypto/bn/arch/aarch64/bn_arch.h | 69 |
1 files changed, 68 insertions, 1 deletions
diff --git a/src/lib/libcrypto/bn/arch/aarch64/bn_arch.h b/src/lib/libcrypto/bn/arch/aarch64/bn_arch.h index 1b9358e710..708083aaf2 100644 --- a/src/lib/libcrypto/bn/arch/aarch64/bn_arch.h +++ b/src/lib/libcrypto/bn/arch/aarch64/bn_arch.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_arch.h,v 1.8 2023/05/28 17:22:04 jsing Exp $ */ | 1 | /* $OpenBSD: bn_arch.h,v 1.9 2023/05/28 17:42:30 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -81,6 +81,73 @@ bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0) | |||
| 81 | *out_r0 = r0; | 81 | *out_r0 = r0; |
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | #define HAVE_BN_MULW_ADDW | ||
| 85 | |||
| 86 | static inline void | ||
| 87 | bn_mulw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1, | ||
| 88 | BN_ULONG *out_r0) | ||
| 89 | { | ||
| 90 | BN_ULONG r1, r0; | ||
| 91 | |||
| 92 | __asm__ ( | ||
| 93 | "umulh %[r1], %[a], %[b] \n" | ||
| 94 | "mul %[r0], %[a], %[b] \n" | ||
| 95 | "adds %[r0], %[r0], %[c] \n" | ||
| 96 | "adc %[r1], %[r1], xzr \n" | ||
| 97 | : [r1]"=&r"(r1), [r0]"=&r"(r0) | ||
| 98 | : [a]"r"(a), [b]"r"(b), [c]"r"(c) | ||
| 99 | : "cc"); | ||
| 100 | |||
| 101 | *out_r1 = r1; | ||
| 102 | *out_r0 = r0; | ||
| 103 | } | ||
| 104 | |||
| 105 | #define HAVE_BN_MULW_ADDW_ADDW | ||
| 106 | |||
| 107 | static inline void | ||
| 108 | bn_mulw_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG d, | ||
| 109 | BN_ULONG *out_r1, BN_ULONG *out_r0) | ||
| 110 | { | ||
| 111 | BN_ULONG r1, r0; | ||
| 112 | |||
| 113 | __asm__ ( | ||
| 114 | "umulh %[r1], %[a], %[b] \n" | ||
| 115 | "mul %[r0], %[a], %[b] \n" | ||
| 116 | "adds %[r0], %[r0], %[c] \n" | ||
| 117 | "adc %[r1], %[r1], xzr \n" | ||
| 118 | "adds %[r0], %[r0], %[d] \n" | ||
| 119 | "adc %[r1], %[r1], xzr \n" | ||
| 120 | : [r1]"=&r"(r1), [r0]"=&r"(r0) | ||
| 121 | : [a]"r"(a), [b]"r"(b), [c]"r"(c), [d]"r"(d) | ||
| 122 | : "cc"); | ||
| 123 | |||
| 124 | *out_r1 = r1; | ||
| 125 | *out_r0 = r0; | ||
| 126 | } | ||
| 127 | |||
| 128 | #define HAVE_BN_MULW_ADDTW | ||
| 129 | |||
| 130 | static inline void | ||
| 131 | bn_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0, | ||
| 132 | BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0) | ||
| 133 | { | ||
| 134 | BN_ULONG r2, r1, r0; | ||
| 135 | |||
| 136 | __asm__ ( | ||
| 137 | "umulh %[r1], %[a], %[b] \n" | ||
| 138 | "mul %[r0], %[a], %[b] \n" | ||
| 139 | "adds %[r0], %[r0], %[c0] \n" | ||
| 140 | "adcs %[r1], %[r1], %[c1] \n" | ||
| 141 | "adc %[r2], xzr, %[c2] \n" | ||
| 142 | : [r2]"=&r"(r2), [r1]"=&r"(r1), [r0]"=&r"(r0) | ||
| 143 | : [a]"r"(a), [b]"r"(b), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0) | ||
| 144 | : "cc"); | ||
| 145 | |||
| 146 | *out_r2 = r2; | ||
| 147 | *out_r1 = r1; | ||
| 148 | *out_r0 = r0; | ||
| 149 | } | ||
| 150 | |||
| 84 | #define HAVE_BN_SUBW | 151 | #define HAVE_BN_SUBW |
| 85 | 152 | ||
| 86 | static inline void | 153 | static inline void |
