diff options
| author | jsing <> | 2023-02-14 18:37:15 +0000 |
|---|---|---|
| committer | jsing <> | 2023-02-14 18:37:15 +0000 |
| commit | b73d4dabf0827fb6841d16a8928fea6b7438742b (patch) | |
| tree | 2b56d52a18947785680f3ae1e0724a413228058f | |
| parent | 5f8f1614f88870fb8337b56253e6079518e33848 (diff) | |
| download | openbsd-b73d4dabf0827fb6841d16a8928fea6b7438742b.tar.gz openbsd-b73d4dabf0827fb6841d16a8928fea6b7438742b.tar.bz2 openbsd-b73d4dabf0827fb6841d16a8928fea6b7438742b.zip | |
Reimplement bn_mul_words(), bn_mul_add_words() and bn_mul_comba{4,8}().
Use bignum primitives rather than the current mess of macros, which also
allows us to remove the essentially duplicate versions of
bn_mul_words() and bn_mul_add_words() for BN_LLONG.
The "mul" macro gets replaced by bn_mulw_addw(), "mul_add" with
bn_mulw_addw_addw() and "mul_add_c" with bn_mulw_addtw() (where 'w'
indicates single word input and 'tw' indicates triple word input).
The variables in the comba functions have also been reordered, so that the
patterns are easier to understand - the compiler can take care of
optimising the inputs and outputs to avoid register moves.
ok tb@
| -rw-r--r-- | src/lib/libcrypto/bn/bn_mul.c | 387 |
1 files changed, 152 insertions, 235 deletions
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c index 38c01dad18..965c1ad036 100644 --- a/src/lib/libcrypto/bn/bn_mul.c +++ b/src/lib/libcrypto/bn/bn_mul.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_mul.c,v 1.31 2023/02/13 04:25:37 jsing Exp $ */ | 1 | /* $OpenBSD: bn_mul.c,v 1.32 2023/02/14 18:37:15 jsing Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -63,293 +63,210 @@ | |||
| 63 | #include <openssl/opensslconf.h> | 63 | #include <openssl/opensslconf.h> |
| 64 | 64 | ||
| 65 | #include "bn_arch.h" | 65 | #include "bn_arch.h" |
| 66 | #include "bn_internal.h" | ||
| 66 | #include "bn_local.h" | 67 | #include "bn_local.h" |
| 67 | 68 | ||
| 69 | /* | ||
| 70 | * bn_mul_add_words() computes (carry:r[i]) = a[i] * w + r[i] + carry, where | ||
| 71 | * a is an array of words and w is a single word. This should really be called | ||
| 72 | * bn_mulw_add_words() since only one input is an array. This is used as a step | ||
| 73 | * in the multiplication of word arrays. | ||
| 74 | */ | ||
| 68 | #ifndef HAVE_BN_MUL_ADD_WORDS | 75 | #ifndef HAVE_BN_MUL_ADD_WORDS |
| 69 | #if defined(BN_LLONG) || defined(BN_UMULT_HIGH) | ||
| 70 | |||
| 71 | BN_ULONG | 76 | BN_ULONG |
| 72 | bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) | 77 | bn_mul_add_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w) |
| 73 | { | 78 | { |
| 74 | BN_ULONG c1 = 0; | 79 | BN_ULONG carry = 0; |
| 75 | 80 | ||
| 76 | assert(num >= 0); | 81 | assert(num >= 0); |
| 77 | if (num <= 0) | 82 | if (num <= 0) |
| 78 | return (c1); | 83 | return 0; |
| 79 | 84 | ||
| 80 | #ifndef OPENSSL_SMALL_FOOTPRINT | 85 | #ifndef OPENSSL_SMALL_FOOTPRINT |
| 81 | while (num & ~3) { | 86 | while (num & ~3) { |
| 82 | mul_add(rp[0], ap[0], w, c1); | 87 | bn_mulw_addw_addw(a[0], w, r[0], carry, &carry, &r[0]); |
| 83 | mul_add(rp[1], ap[1], w, c1); | 88 | bn_mulw_addw_addw(a[1], w, r[1], carry, &carry, &r[1]); |
| 84 | mul_add(rp[2], ap[2], w, c1); | 89 | bn_mulw_addw_addw(a[2], w, r[2], carry, &carry, &r[2]); |
| 85 | mul_add(rp[3], ap[3], w, c1); | 90 | bn_mulw_addw_addw(a[3], w, r[3], carry, &carry, &r[3]); |
| 86 | ap += 4; | 91 | a += 4; |
| 87 | rp += 4; | 92 | r += 4; |
| 88 | num -= 4; | 93 | num -= 4; |
| 89 | } | 94 | } |
| 90 | #endif | 95 | #endif |
| 91 | while (num) { | 96 | while (num) { |
| 92 | mul_add(rp[0], ap[0], w, c1); | 97 | bn_mulw_addw_addw(a[0], w, r[0], carry, &carry, &r[0]); |
| 93 | ap++; | 98 | a++; |
| 94 | rp++; | 99 | r++; |
| 95 | num--; | 100 | num--; |
| 96 | } | 101 | } |
| 97 | 102 | ||
| 98 | return (c1); | 103 | return carry; |
| 99 | } | 104 | } |
| 105 | #endif | ||
| 100 | 106 | ||
| 101 | #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ | 107 | /* |
| 102 | 108 | * bn_mul_comba4() computes r[] = a[] * b[] using Comba multiplication | |
| 103 | BN_ULONG | 109 | * (https://everything2.com/title/Comba+multiplication), where a and b are both |
| 104 | bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) | 110 | * four word arrays, producing an eight word array result. |
| 111 | */ | ||
| 112 | #ifndef HAVE_BN_MUL_COMBA4 | ||
| 113 | void | ||
| 114 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
| 105 | { | 115 | { |
| 106 | BN_ULONG c = 0; | 116 | BN_ULONG c0, c1, c2; |
| 107 | BN_ULONG bl, bh; | ||
| 108 | 117 | ||
| 109 | assert(num >= 0); | 118 | bn_mulw_addtw(a[0], b[0], 0, 0, 0, &c2, &c1, &r[0]); |
| 110 | if (num <= 0) | ||
| 111 | return ((BN_ULONG)0); | ||
| 112 | 119 | ||
| 113 | bl = LBITS(w); | 120 | bn_mulw_addtw(a[0], b[1], 0, c2, c1, &c2, &c1, &c0); |
| 114 | bh = HBITS(w); | 121 | bn_mulw_addtw(a[1], b[0], c2, c1, c0, &c2, &c1, &r[1]); |
| 115 | 122 | ||
| 116 | #ifndef OPENSSL_SMALL_FOOTPRINT | 123 | bn_mulw_addtw(a[2], b[0], 0, c2, c1, &c2, &c1, &c0); |
| 117 | while (num & ~3) { | 124 | bn_mulw_addtw(a[1], b[1], c2, c1, c0, &c2, &c1, &c0); |
| 118 | mul_add(rp[0], ap[0], bl, bh, c); | 125 | bn_mulw_addtw(a[0], b[2], c2, c1, c0, &c2, &c1, &r[2]); |
| 119 | mul_add(rp[1], ap[1], bl, bh, c); | ||
| 120 | mul_add(rp[2], ap[2], bl, bh, c); | ||
| 121 | mul_add(rp[3], ap[3], bl, bh, c); | ||
| 122 | ap += 4; | ||
| 123 | rp += 4; | ||
| 124 | num -= 4; | ||
| 125 | } | ||
| 126 | #endif | ||
| 127 | while (num) { | ||
| 128 | mul_add(rp[0], ap[0], bl, bh, c); | ||
| 129 | ap++; | ||
| 130 | rp++; | ||
| 131 | num--; | ||
| 132 | } | ||
| 133 | return (c); | ||
| 134 | } | ||
| 135 | 126 | ||
| 136 | #endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ | 127 | bn_mulw_addtw(a[0], b[3], 0, c2, c1, &c2, &c1, &c0); |
| 137 | #endif | 128 | bn_mulw_addtw(a[1], b[2], c2, c1, c0, &c2, &c1, &c0); |
| 129 | bn_mulw_addtw(a[2], b[1], c2, c1, c0, &c2, &c1, &c0); | ||
| 130 | bn_mulw_addtw(a[3], b[0], c2, c1, c0, &c2, &c1, &r[3]); | ||
| 138 | 131 | ||
| 139 | #ifndef HAVE_BN_MUL_COMBA4 | 132 | bn_mulw_addtw(a[3], b[1], 0, c2, c1, &c2, &c1, &c0); |
| 140 | void | 133 | bn_mulw_addtw(a[2], b[2], c2, c1, c0, &c2, &c1, &c0); |
| 141 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 134 | bn_mulw_addtw(a[1], b[3], c2, c1, c0, &c2, &c1, &r[4]); |
| 142 | { | 135 | |
| 143 | BN_ULONG c1, c2, c3; | 136 | bn_mulw_addtw(a[2], b[3], 0, c2, c1, &c2, &c1, &c0); |
| 144 | 137 | bn_mulw_addtw(a[3], b[2], c2, c1, c0, &c2, &c1, &r[5]); | |
| 145 | c1 = 0; | 138 | |
| 146 | c2 = 0; | 139 | bn_mulw_addtw(a[3], b[3], 0, c2, c1, &c2, &r[7], &r[6]); |
| 147 | c3 = 0; | ||
| 148 | mul_add_c(a[0], b[0], c1, c2, c3); | ||
| 149 | r[0] = c1; | ||
| 150 | c1 = 0; | ||
| 151 | mul_add_c(a[0], b[1], c2, c3, c1); | ||
| 152 | mul_add_c(a[1], b[0], c2, c3, c1); | ||
| 153 | r[1] = c2; | ||
| 154 | c2 = 0; | ||
| 155 | mul_add_c(a[2], b[0], c3, c1, c2); | ||
| 156 | mul_add_c(a[1], b[1], c3, c1, c2); | ||
| 157 | mul_add_c(a[0], b[2], c3, c1, c2); | ||
| 158 | r[2] = c3; | ||
| 159 | c3 = 0; | ||
| 160 | mul_add_c(a[0], b[3], c1, c2, c3); | ||
| 161 | mul_add_c(a[1], b[2], c1, c2, c3); | ||
| 162 | mul_add_c(a[2], b[1], c1, c2, c3); | ||
| 163 | mul_add_c(a[3], b[0], c1, c2, c3); | ||
| 164 | r[3] = c1; | ||
| 165 | c1 = 0; | ||
| 166 | mul_add_c(a[3], b[1], c2, c3, c1); | ||
| 167 | mul_add_c(a[2], b[2], c2, c3, c1); | ||
| 168 | mul_add_c(a[1], b[3], c2, c3, c1); | ||
| 169 | r[4] = c2; | ||
| 170 | c2 = 0; | ||
| 171 | mul_add_c(a[2], b[3], c3, c1, c2); | ||
| 172 | mul_add_c(a[3], b[2], c3, c1, c2); | ||
| 173 | r[5] = c3; | ||
| 174 | c3 = 0; | ||
| 175 | mul_add_c(a[3], b[3], c1, c2, c3); | ||
| 176 | r[6] = c1; | ||
| 177 | r[7] = c2; | ||
| 178 | } | 140 | } |
| 179 | #endif | 141 | #endif |
| 180 | 142 | ||
| 143 | /* | ||
| 144 | * bn_mul_comba8() computes r[] = a[] * b[] using Comba multiplication | ||
| 145 | * (https://everything2.com/title/Comba+multiplication), where a and b are both | ||
| 146 | * eight word arrays, producing a 16 word array result. | ||
| 147 | */ | ||
| 181 | #ifndef HAVE_BN_MUL_COMBA8 | 148 | #ifndef HAVE_BN_MUL_COMBA8 |
| 182 | void | 149 | void |
| 183 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 150 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
| 184 | { | 151 | { |
| 185 | BN_ULONG c1, c2, c3; | 152 | BN_ULONG c0, c1, c2; |
| 186 | 153 | ||
| 187 | c1 = 0; | 154 | bn_mulw_addtw(a[0], b[0], 0, 0, 0, &c2, &c1, &r[0]); |
| 188 | c2 = 0; | 155 | |
| 189 | c3 = 0; | 156 | bn_mulw_addtw(a[0], b[1], 0, c2, c1, &c2, &c1, &c0); |
| 190 | mul_add_c(a[0], b[0], c1, c2, c3); | 157 | bn_mulw_addtw(a[1], b[0], c2, c1, c0, &c2, &c1, &r[1]); |
| 191 | r[0] = c1; | 158 | |
| 192 | c1 = 0; | 159 | bn_mulw_addtw(a[2], b[0], 0, c2, c1, &c2, &c1, &c0); |
| 193 | mul_add_c(a[0], b[1], c2, c3, c1); | 160 | bn_mulw_addtw(a[1], b[1], c2, c1, c0, &c2, &c1, &c0); |
| 194 | mul_add_c(a[1], b[0], c2, c3, c1); | 161 | bn_mulw_addtw(a[0], b[2], c2, c1, c0, &c2, &c1, &r[2]); |
| 195 | r[1] = c2; | 162 | |
| 196 | c2 = 0; | 163 | bn_mulw_addtw(a[0], b[3], 0, c2, c1, &c2, &c1, &c0); |
| 197 | mul_add_c(a[2], b[0], c3, c1, c2); | 164 | bn_mulw_addtw(a[1], b[2], c2, c1, c0, &c2, &c1, &c0); |
| 198 | mul_add_c(a[1], b[1], c3, c1, c2); | 165 | bn_mulw_addtw(a[2], b[1], c2, c1, c0, &c2, &c1, &c0); |
| 199 | mul_add_c(a[0], b[2], c3, c1, c2); | 166 | bn_mulw_addtw(a[3], b[0], c2, c1, c0, &c2, &c1, &r[3]); |
| 200 | r[2] = c3; | 167 | |
| 201 | c3 = 0; | 168 | bn_mulw_addtw(a[4], b[0], 0, c2, c1, &c2, &c1, &c0); |
| 202 | mul_add_c(a[0], b[3], c1, c2, c3); | 169 | bn_mulw_addtw(a[3], b[1], c2, c1, c0, &c2, &c1, &c0); |
| 203 | mul_add_c(a[1], b[2], c1, c2, c3); | 170 | bn_mulw_addtw(a[2], b[2], c2, c1, c0, &c2, &c1, &c0); |
| 204 | mul_add_c(a[2], b[1], c1, c2, c3); | 171 | bn_mulw_addtw(a[1], b[3], c2, c1, c0, &c2, &c1, &c0); |
| 205 | mul_add_c(a[3], b[0], c1, c2, c3); | 172 | bn_mulw_addtw(a[0], b[4], c2, c1, c0, &c2, &c1, &r[4]); |
| 206 | r[3] = c1; | 173 | |
| 207 | c1 = 0; | 174 | bn_mulw_addtw(a[0], b[5], 0, c2, c1, &c2, &c1, &c0); |
| 208 | mul_add_c(a[4], b[0], c2, c3, c1); | 175 | bn_mulw_addtw(a[1], b[4], c2, c1, c0, &c2, &c1, &c0); |
| 209 | mul_add_c(a[3], b[1], c2, c3, c1); | 176 | bn_mulw_addtw(a[2], b[3], c2, c1, c0, &c2, &c1, &c0); |
| 210 | mul_add_c(a[2], b[2], c2, c3, c1); | 177 | bn_mulw_addtw(a[3], b[2], c2, c1, c0, &c2, &c1, &c0); |
| 211 | mul_add_c(a[1], b[3], c2, c3, c1); | 178 | bn_mulw_addtw(a[4], b[1], c2, c1, c0, &c2, &c1, &c0); |
| 212 | mul_add_c(a[0], b[4], c2, c3, c1); | 179 | bn_mulw_addtw(a[5], b[0], c2, c1, c0, &c2, &c1, &r[5]); |
| 213 | r[4] = c2; | 180 | |
| 214 | c2 = 0; | 181 | bn_mulw_addtw(a[6], b[0], 0, c2, c1, &c2, &c1, &c0); |
| 215 | mul_add_c(a[0], b[5], c3, c1, c2); | 182 | bn_mulw_addtw(a[5], b[1], c2, c1, c0, &c2, &c1, &c0); |
| 216 | mul_add_c(a[1], b[4], c3, c1, c2); | 183 | bn_mulw_addtw(a[4], b[2], c2, c1, c0, &c2, &c1, &c0); |
| 217 | mul_add_c(a[2], b[3], c3, c1, c2); | 184 | bn_mulw_addtw(a[3], b[3], c2, c1, c0, &c2, &c1, &c0); |
| 218 | mul_add_c(a[3], b[2], c3, c1, c2); | 185 | bn_mulw_addtw(a[2], b[4], c2, c1, c0, &c2, &c1, &c0); |
| 219 | mul_add_c(a[4], b[1], c3, c1, c2); | 186 | bn_mulw_addtw(a[1], b[5], c2, c1, c0, &c2, &c1, &c0); |
| 220 | mul_add_c(a[5], b[0], c3, c1, c2); | 187 | bn_mulw_addtw(a[0], b[6], c2, c1, c0, &c2, &c1, &r[6]); |
| 221 | r[5] = c3; | 188 | |
| 222 | c3 = 0; | 189 | bn_mulw_addtw(a[0], b[7], 0, c2, c1, &c2, &c1, &c0); |
| 223 | mul_add_c(a[6], b[0], c1, c2, c3); | 190 | bn_mulw_addtw(a[1], b[6], c2, c1, c0, &c2, &c1, &c0); |
| 224 | mul_add_c(a[5], b[1], c1, c2, c3); | 191 | bn_mulw_addtw(a[2], b[5], c2, c1, c0, &c2, &c1, &c0); |
| 225 | mul_add_c(a[4], b[2], c1, c2, c3); | 192 | bn_mulw_addtw(a[3], b[4], c2, c1, c0, &c2, &c1, &c0); |
| 226 | mul_add_c(a[3], b[3], c1, c2, c3); | 193 | bn_mulw_addtw(a[4], b[3], c2, c1, c0, &c2, &c1, &c0); |
| 227 | mul_add_c(a[2], b[4], c1, c2, c3); | 194 | bn_mulw_addtw(a[5], b[2], c2, c1, c0, &c2, &c1, &c0); |
| 228 | mul_add_c(a[1], b[5], c1, c2, c3); | 195 | bn_mulw_addtw(a[6], b[1], c2, c1, c0, &c2, &c1, &c0); |
| 229 | mul_add_c(a[0], b[6], c1, c2, c3); | 196 | bn_mulw_addtw(a[7], b[0], c2, c1, c0, &c2, &c1, &r[7]); |
| 230 | r[6] = c1; | 197 | |
| 231 | c1 = 0; | 198 | bn_mulw_addtw(a[7], b[1], 0, c2, c1, &c2, &c1, &c0); |
| 232 | mul_add_c(a[0], b[7], c2, c3, c1); | 199 | bn_mulw_addtw(a[6], b[2], c2, c1, c0, &c2, &c1, &c0); |
| 233 | mul_add_c(a[1], b[6], c2, c3, c1); | 200 | bn_mulw_addtw(a[5], b[3], c2, c1, c0, &c2, &c1, &c0); |
| 234 | mul_add_c(a[2], b[5], c2, c3, c1); | 201 | bn_mulw_addtw(a[4], b[4], c2, c1, c0, &c2, &c1, &c0); |
| 235 | mul_add_c(a[3], b[4], c2, c3, c1); | 202 | bn_mulw_addtw(a[3], b[5], c2, c1, c0, &c2, &c1, &c0); |
| 236 | mul_add_c(a[4], b[3], c2, c3, c1); | 203 | bn_mulw_addtw(a[2], b[6], c2, c1, c0, &c2, &c1, &c0); |
| 237 | mul_add_c(a[5], b[2], c2, c3, c1); | 204 | bn_mulw_addtw(a[1], b[7], c2, c1, c0, &c2, &c1, &r[8]); |
| 238 | mul_add_c(a[6], b[1], c2, c3, c1); | 205 | |
| 239 | mul_add_c(a[7], b[0], c2, c3, c1); | 206 | bn_mulw_addtw(a[2], b[7], 0, c2, c1, &c2, &c1, &c0); |
| 240 | r[7] = c2; | 207 | bn_mulw_addtw(a[3], b[6], c2, c1, c0, &c2, &c1, &c0); |
| 241 | c2 = 0; | 208 | bn_mulw_addtw(a[4], b[5], c2, c1, c0, &c2, &c1, &c0); |
| 242 | mul_add_c(a[7], b[1], c3, c1, c2); | 209 | bn_mulw_addtw(a[5], b[4], c2, c1, c0, &c2, &c1, &c0); |
| 243 | mul_add_c(a[6], b[2], c3, c1, c2); | 210 | bn_mulw_addtw(a[6], b[3], c2, c1, c0, &c2, &c1, &c0); |
| 244 | mul_add_c(a[5], b[3], c3, c1, c2); | 211 | bn_mulw_addtw(a[7], b[2], c2, c1, c0, &c2, &c1, &r[9]); |
| 245 | mul_add_c(a[4], b[4], c3, c1, c2); | 212 | |
| 246 | mul_add_c(a[3], b[5], c3, c1, c2); | 213 | bn_mulw_addtw(a[7], b[3], 0, c2, c1, &c2, &c1, &c0); |
| 247 | mul_add_c(a[2], b[6], c3, c1, c2); | 214 | bn_mulw_addtw(a[6], b[4], c2, c1, c0, &c2, &c1, &c0); |
| 248 | mul_add_c(a[1], b[7], c3, c1, c2); | 215 | bn_mulw_addtw(a[5], b[5], c2, c1, c0, &c2, &c1, &c0); |
| 249 | r[8] = c3; | 216 | bn_mulw_addtw(a[4], b[6], c2, c1, c0, &c2, &c1, &c0); |
| 250 | c3 = 0; | 217 | bn_mulw_addtw(a[3], b[7], c2, c1, c0, &c2, &c1, &r[10]); |
| 251 | mul_add_c(a[2], b[7], c1, c2, c3); | 218 | |
| 252 | mul_add_c(a[3], b[6], c1, c2, c3); | 219 | bn_mulw_addtw(a[4], b[7], 0, c2, c1, &c2, &c1, &c0); |
| 253 | mul_add_c(a[4], b[5], c1, c2, c3); | 220 | bn_mulw_addtw(a[5], b[6], c2, c1, c0, &c2, &c1, &c0); |
| 254 | mul_add_c(a[5], b[4], c1, c2, c3); | 221 | bn_mulw_addtw(a[6], b[5], c2, c1, c0, &c2, &c1, &c0); |
| 255 | mul_add_c(a[6], b[3], c1, c2, c3); | 222 | bn_mulw_addtw(a[7], b[4], c2, c1, c0, &c2, &c1, &r[11]); |
| 256 | mul_add_c(a[7], b[2], c1, c2, c3); | 223 | |
| 257 | r[9] = c1; | 224 | bn_mulw_addtw(a[7], b[5], 0, c2, c1, &c2, &c1, &c0); |
| 258 | c1 = 0; | 225 | bn_mulw_addtw(a[6], b[6], c2, c1, c0, &c2, &c1, &c0); |
| 259 | mul_add_c(a[7], b[3], c2, c3, c1); | 226 | bn_mulw_addtw(a[5], b[7], c2, c1, c0, &c2, &c1, &r[12]); |
| 260 | mul_add_c(a[6], b[4], c2, c3, c1); | 227 | |
| 261 | mul_add_c(a[5], b[5], c2, c3, c1); | 228 | bn_mulw_addtw(a[6], b[7], 0, c2, c1, &c2, &c1, &c0); |
| 262 | mul_add_c(a[4], b[6], c2, c3, c1); | 229 | bn_mulw_addtw(a[7], b[6], c2, c1, c0, &c2, &c1, &r[13]); |
| 263 | mul_add_c(a[3], b[7], c2, c3, c1); | 230 | |
| 264 | r[10] = c2; | 231 | bn_mulw_addtw(a[7], b[7], 0, c2, c1, &c2, &r[15], &r[14]); |
| 265 | c2 = 0; | ||
| 266 | mul_add_c(a[4], b[7], c3, c1, c2); | ||
| 267 | mul_add_c(a[5], b[6], c3, c1, c2); | ||
| 268 | mul_add_c(a[6], b[5], c3, c1, c2); | ||
| 269 | mul_add_c(a[7], b[4], c3, c1, c2); | ||
| 270 | r[11] = c3; | ||
| 271 | c3 = 0; | ||
| 272 | mul_add_c(a[7], b[5], c1, c2, c3); | ||
| 273 | mul_add_c(a[6], b[6], c1, c2, c3); | ||
| 274 | mul_add_c(a[5], b[7], c1, c2, c3); | ||
| 275 | r[12] = c1; | ||
| 276 | c1 = 0; | ||
| 277 | mul_add_c(a[6], b[7], c2, c3, c1); | ||
| 278 | mul_add_c(a[7], b[6], c2, c3, c1); | ||
| 279 | r[13] = c2; | ||
| 280 | c2 = 0; | ||
| 281 | mul_add_c(a[7], b[7], c3, c1, c2); | ||
| 282 | r[14] = c3; | ||
| 283 | r[15] = c1; | ||
| 284 | } | 232 | } |
| 285 | #endif | 233 | #endif |
| 286 | 234 | ||
| 235 | /* | ||
| 236 | * bn_mul_words() computes (carry:r[i]) = a[i] * w + carry, where a is an array | ||
| 237 | * of words and w is a single word. This should really be called bn_mulw_words() | ||
| 238 | * since only one input is an array. This is used as a step in the multiplication | ||
| 239 | * of word arrays. | ||
| 240 | */ | ||
| 287 | #ifndef HAVE_BN_MUL_WORDS | 241 | #ifndef HAVE_BN_MUL_WORDS |
| 288 | #if defined(BN_LLONG) || defined(BN_UMULT_HIGH) | ||
| 289 | |||
| 290 | BN_ULONG | 242 | BN_ULONG |
| 291 | bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) | 243 | bn_mul_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w) |
| 292 | { | ||
| 293 | BN_ULONG c1 = 0; | ||
| 294 | |||
| 295 | assert(num >= 0); | ||
| 296 | if (num <= 0) | ||
| 297 | return (c1); | ||
| 298 | |||
| 299 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
| 300 | while (num & ~3) { | ||
| 301 | mul(rp[0], ap[0], w, c1); | ||
| 302 | mul(rp[1], ap[1], w, c1); | ||
| 303 | mul(rp[2], ap[2], w, c1); | ||
| 304 | mul(rp[3], ap[3], w, c1); | ||
| 305 | ap += 4; | ||
| 306 | rp += 4; | ||
| 307 | num -= 4; | ||
| 308 | } | ||
| 309 | #endif | ||
| 310 | while (num) { | ||
| 311 | mul(rp[0], ap[0], w, c1); | ||
| 312 | ap++; | ||
| 313 | rp++; | ||
| 314 | num--; | ||
| 315 | } | ||
| 316 | return (c1); | ||
| 317 | } | ||
| 318 | #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ | ||
| 319 | |||
| 320 | BN_ULONG | ||
| 321 | bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) | ||
| 322 | { | 244 | { |
| 323 | BN_ULONG carry = 0; | 245 | BN_ULONG carry = 0; |
| 324 | BN_ULONG bl, bh; | ||
| 325 | 246 | ||
| 326 | assert(num >= 0); | 247 | assert(num >= 0); |
| 327 | if (num <= 0) | 248 | if (num <= 0) |
| 328 | return ((BN_ULONG)0); | 249 | return 0; |
| 329 | |||
| 330 | bl = LBITS(w); | ||
| 331 | bh = HBITS(w); | ||
| 332 | 250 | ||
| 333 | #ifndef OPENSSL_SMALL_FOOTPRINT | 251 | #ifndef OPENSSL_SMALL_FOOTPRINT |
| 334 | while (num & ~3) { | 252 | while (num & ~3) { |
| 335 | mul(rp[0], ap[0], bl, bh, carry); | 253 | bn_mulw_addw(a[0], w, carry, &carry, &r[0]); |
| 336 | mul(rp[1], ap[1], bl, bh, carry); | 254 | bn_mulw_addw(a[1], w, carry, &carry, &r[1]); |
| 337 | mul(rp[2], ap[2], bl, bh, carry); | 255 | bn_mulw_addw(a[2], w, carry, &carry, &r[2]); |
| 338 | mul(rp[3], ap[3], bl, bh, carry); | 256 | bn_mulw_addw(a[3], w, carry, &carry, &r[3]); |
| 339 | ap += 4; | 257 | a += 4; |
| 340 | rp += 4; | 258 | r += 4; |
| 341 | num -= 4; | 259 | num -= 4; |
| 342 | } | 260 | } |
| 343 | #endif | 261 | #endif |
| 344 | while (num) { | 262 | while (num) { |
| 345 | mul(rp[0], ap[0], bl, bh, carry); | 263 | bn_mulw_addw(a[0], w, carry, &carry, &r[0]); |
| 346 | ap++; | 264 | a++; |
| 347 | rp++; | 265 | r++; |
| 348 | num--; | 266 | num--; |
| 349 | } | 267 | } |
| 350 | return (carry); | 268 | return carry; |
| 351 | } | 269 | } |
| 352 | #endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ | ||
| 353 | #endif | 270 | #endif |
| 354 | 271 | ||
| 355 | #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) | 272 | #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) |
