diff options
| author | jsing <> | 2023-06-12 16:17:24 +0000 |
|---|---|---|
| committer | jsing <> | 2023-06-12 16:17:24 +0000 |
| commit | e65682b76bcfaec43a218a52db723a341bec5b90 (patch) | |
| tree | b4e21a96adfc49bf3a4624865e60a6430dcf75ab /src/lib/libc | |
| parent | 3fc29f2a9986e70c00a72c515bd13f00f6157fc0 (diff) | |
| download | openbsd-e65682b76bcfaec43a218a52db723a341bec5b90.tar.gz openbsd-e65682b76bcfaec43a218a52db723a341bec5b90.tar.bz2 openbsd-e65682b76bcfaec43a218a52db723a341bec5b90.zip | |
Provide and use various quad word primitives.
This includes bn_qwaddqw(), bn_qwsubqw(), bn_qwmulw_addw() and
bn_qwmulw_addqw_addw(). These can typically be optimised on architectures
that have a reasonable number of general purpose registers.
ok tb@
Diffstat (limited to '')
| -rw-r--r-- | src/lib/libcrypto/bn/bn_add.c | 18 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn_internal.h | 110 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn_mul.c | 19 |
3 files changed, 120 insertions, 27 deletions
diff --git a/src/lib/libcrypto/bn/bn_add.c b/src/lib/libcrypto/bn/bn_add.c index 92489b7da3..36f160ab5f 100644 --- a/src/lib/libcrypto/bn/bn_add.c +++ b/src/lib/libcrypto/bn/bn_add.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_add.c,v 1.24 2023/02/22 05:46:37 jsing Exp $ */ | 1 | /* $OpenBSD: bn_add.c,v 1.25 2023/06/12 16:17:24 jsing Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -80,18 +80,14 @@ bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | |||
| 80 | if (n <= 0) | 80 | if (n <= 0) |
| 81 | return 0; | 81 | return 0; |
| 82 | 82 | ||
| 83 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
| 84 | while (n & ~3) { | 83 | while (n & ~3) { |
| 85 | bn_addw_addw(a[0], b[0], carry, &carry, &r[0]); | 84 | bn_qwaddqw(a[3], a[2], a[1], a[0], b[3], b[2], b[1], b[0], |
| 86 | bn_addw_addw(a[1], b[1], carry, &carry, &r[1]); | 85 | carry, &carry, &r[3], &r[2], &r[1], &r[0]); |
| 87 | bn_addw_addw(a[2], b[2], carry, &carry, &r[2]); | ||
| 88 | bn_addw_addw(a[3], b[3], carry, &carry, &r[3]); | ||
| 89 | a += 4; | 86 | a += 4; |
| 90 | b += 4; | 87 | b += 4; |
| 91 | r += 4; | 88 | r += 4; |
| 92 | n -= 4; | 89 | n -= 4; |
| 93 | } | 90 | } |
| 94 | #endif | ||
| 95 | while (n) { | 91 | while (n) { |
| 96 | bn_addw_addw(a[0], b[0], carry, &carry, &r[0]); | 92 | bn_addw_addw(a[0], b[0], carry, &carry, &r[0]); |
| 97 | a++; | 93 | a++; |
| @@ -165,18 +161,14 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | |||
| 165 | if (n <= 0) | 161 | if (n <= 0) |
| 166 | return 0; | 162 | return 0; |
| 167 | 163 | ||
| 168 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
| 169 | while (n & ~3) { | 164 | while (n & ~3) { |
| 170 | bn_subw_subw(a[0], b[0], borrow, &borrow, &r[0]); | 165 | bn_qwsubqw(a[3], a[2], a[1], a[0], b[3], b[2], b[1], b[0], |
| 171 | bn_subw_subw(a[1], b[1], borrow, &borrow, &r[1]); | 166 | borrow, &borrow, &r[3], &r[2], &r[1], &r[0]); |
| 172 | bn_subw_subw(a[2], b[2], borrow, &borrow, &r[2]); | ||
| 173 | bn_subw_subw(a[3], b[3], borrow, &borrow, &r[3]); | ||
| 174 | a += 4; | 167 | a += 4; |
| 175 | b += 4; | 168 | b += 4; |
| 176 | r += 4; | 169 | r += 4; |
| 177 | n -= 4; | 170 | n -= 4; |
| 178 | } | 171 | } |
| 179 | #endif | ||
| 180 | while (n) { | 172 | while (n) { |
| 181 | bn_subw_subw(a[0], b[0], borrow, &borrow, &r[0]); | 173 | bn_subw_subw(a[0], b[0], borrow, &borrow, &r[0]); |
| 182 | a++; | 174 | a++; |
diff --git a/src/lib/libcrypto/bn/bn_internal.h b/src/lib/libcrypto/bn/bn_internal.h index 8a729b8e44..5f86e21330 100644 --- a/src/lib/libcrypto/bn/bn_internal.h +++ b/src/lib/libcrypto/bn/bn_internal.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_internal.h,v 1.11 2023/03/07 09:35:55 jsing Exp $ */ | 1 | /* $OpenBSD: bn_internal.h,v 1.12 2023/06/12 16:17:24 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -123,6 +123,33 @@ bn_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1, | |||
| 123 | #endif | 123 | #endif |
| 124 | 124 | ||
| 125 | /* | 125 | /* |
| 126 | * bn_qwaddqw() computes | ||
| 127 | * (r4:r3:r2:r1:r0) = (a3:a2:a1:a0) + (b3:b2:b1:b0) + carry, where a is a quad word, | ||
| 128 | * b is a quad word, and carry is a single word with value 0 or 1, producing a four | ||
| 129 | * word result and carry. | ||
| 130 | */ | ||
| 131 | #ifndef HAVE_BN_QWADDQW | ||
| 132 | static inline void | ||
| 133 | bn_qwaddqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3, | ||
| 134 | BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG carry, BN_ULONG *out_carry, | ||
| 135 | BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0) | ||
| 136 | { | ||
| 137 | BN_ULONG r3, r2, r1, r0; | ||
| 138 | |||
| 139 | bn_addw_addw(a0, b0, carry, &carry, &r0); | ||
| 140 | bn_addw_addw(a1, b1, carry, &carry, &r1); | ||
| 141 | bn_addw_addw(a2, b2, carry, &carry, &r2); | ||
| 142 | bn_addw_addw(a3, b3, carry, &carry, &r3); | ||
| 143 | |||
| 144 | *out_carry = carry; | ||
| 145 | *out_r3 = r3; | ||
| 146 | *out_r2 = r2; | ||
| 147 | *out_r1 = r1; | ||
| 148 | *out_r0 = r0; | ||
| 149 | } | ||
| 150 | #endif | ||
| 151 | |||
| 152 | /* | ||
| 126 | * bn_subw() computes r0 = a - b, where both inputs are single words, | 153 | * bn_subw() computes r0 = a - b, where both inputs are single words, |
| 127 | * producing a single word result and borrow. | 154 | * producing a single word result and borrow. |
| 128 | */ | 155 | */ |
| @@ -160,6 +187,33 @@ bn_subw_subw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_borrow, | |||
| 160 | #endif | 187 | #endif |
| 161 | 188 | ||
| 162 | /* | 189 | /* |
| 190 | * bn_qwsubqw() computes | ||
| 191 | * (r3:r2:r1:r0) = (a3:a2:a1:a0) - (b3:b2:b1:b0) - borrow, where a is a quad word, | ||
| 192 | * b is a quad word, and borrow is a single word with value 0 or 1, producing a | ||
| 193 | * four word result and borrow. | ||
| 194 | */ | ||
| 195 | #ifndef HAVE_BN_QWSUBQW | ||
| 196 | static inline void | ||
| 197 | bn_qwsubqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3, | ||
| 198 | BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG borrow, BN_ULONG *out_borrow, | ||
| 199 | BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0) | ||
| 200 | { | ||
| 201 | BN_ULONG r3, r2, r1, r0; | ||
| 202 | |||
| 203 | bn_subw_subw(a0, b0, borrow, &borrow, &r0); | ||
| 204 | bn_subw_subw(a1, b1, borrow, &borrow, &r1); | ||
| 205 | bn_subw_subw(a2, b2, borrow, &borrow, &r2); | ||
| 206 | bn_subw_subw(a3, b3, borrow, &borrow, &r3); | ||
| 207 | |||
| 208 | *out_borrow = borrow; | ||
| 209 | *out_r3 = r3; | ||
| 210 | *out_r2 = r2; | ||
| 211 | *out_r1 = r1; | ||
| 212 | *out_r0 = r0; | ||
| 213 | } | ||
| 214 | #endif | ||
| 215 | |||
| 216 | /* | ||
| 163 | * bn_mulw() computes (r1:r0) = a * b, where both inputs are single words, | 217 | * bn_mulw() computes (r1:r0) = a * b, where both inputs are single words, |
| 164 | * producing a double word result. | 218 | * producing a double word result. |
| 165 | */ | 219 | */ |
| @@ -387,4 +441,58 @@ bn_mul2_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0 | |||
| 387 | } | 441 | } |
| 388 | #endif | 442 | #endif |
| 389 | 443 | ||
| 444 | /* | ||
| 445 | * bn_qwmulw_addw() computes (r4:r3:r2:r1:r0) = (a3:a2:a1:a0) * b + c, where a | ||
| 446 | * is a quad word, b is a single word and c is a single word, producing a five | ||
| 447 | * word result. | ||
| 448 | */ | ||
| 449 | #ifndef HAVE_BN_QWMULW_ADDW | ||
| 450 | static inline void | ||
| 451 | bn_qwmulw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b, | ||
| 452 | BN_ULONG c, BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2, | ||
| 453 | BN_ULONG *out_r1, BN_ULONG *out_r0) | ||
| 454 | { | ||
| 455 | BN_ULONG r3, r2, r1, r0; | ||
| 456 | |||
| 457 | bn_mulw_addw(a0, b, c, &c, &r0); | ||
| 458 | bn_mulw_addw(a1, b, c, &c, &r1); | ||
| 459 | bn_mulw_addw(a2, b, c, &c, &r2); | ||
| 460 | bn_mulw_addw(a3, b, c, &c, &r3); | ||
| 461 | |||
| 462 | *out_r4 = c; | ||
| 463 | *out_r3 = r3; | ||
| 464 | *out_r2 = r2; | ||
| 465 | *out_r1 = r1; | ||
| 466 | *out_r0 = r0; | ||
| 467 | } | ||
| 468 | #endif | ||
| 469 | |||
| 470 | /* | ||
| 471 | * bn_qwmulw_addqw_addw() computes | ||
| 472 | * (r4:r3:r2:r1:r0) = (a3:a2:a1:a0) * b + (c3:c2:c1:c0) + d, where a | ||
| 473 | * is a quad word, b is a single word, c is a quad word, and d is a single word, | ||
| 474 | * producing a five word result. | ||
| 475 | */ | ||
| 476 | #ifndef HAVE_BN_QWMULW_ADDQW_ADDW | ||
| 477 | static inline void | ||
| 478 | bn_qwmulw_addqw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, | ||
| 479 | BN_ULONG b, BN_ULONG c3, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0, BN_ULONG d, | ||
| 480 | BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, | ||
| 481 | BN_ULONG *out_r0) | ||
| 482 | { | ||
| 483 | BN_ULONG r3, r2, r1, r0; | ||
| 484 | |||
| 485 | bn_mulw_addw_addw(a0, b, c0, d, &d, &r0); | ||
| 486 | bn_mulw_addw_addw(a1, b, c1, d, &d, &r1); | ||
| 487 | bn_mulw_addw_addw(a2, b, c2, d, &d, &r2); | ||
| 488 | bn_mulw_addw_addw(a3, b, c3, d, &d, &r3); | ||
| 489 | |||
| 490 | *out_r4 = d; | ||
| 491 | *out_r3 = r3; | ||
| 492 | *out_r2 = r2; | ||
| 493 | *out_r1 = r1; | ||
| 494 | *out_r0 = r0; | ||
| 495 | } | ||
| 496 | #endif | ||
| 497 | |||
| 390 | #endif | 498 | #endif |
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c index 118e8cddc5..65088cc5c4 100644 --- a/src/lib/libcrypto/bn/bn_mul.c +++ b/src/lib/libcrypto/bn/bn_mul.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_mul.c,v 1.37 2023/04/19 10:51:22 jsing Exp $ */ | 1 | /* $OpenBSD: bn_mul.c,v 1.38 2023/06/12 16:17:24 jsing Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -210,17 +210,13 @@ bn_mul_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w) | |||
| 210 | if (num <= 0) | 210 | if (num <= 0) |
| 211 | return 0; | 211 | return 0; |
| 212 | 212 | ||
| 213 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
| 214 | while (num & ~3) { | 213 | while (num & ~3) { |
| 215 | bn_mulw_addw(a[0], w, carry, &carry, &r[0]); | 214 | bn_qwmulw_addw(a[3], a[2], a[1], a[0], w, carry, &carry, |
| 216 | bn_mulw_addw(a[1], w, carry, &carry, &r[1]); | 215 | &r[3], &r[2], &r[1], &r[0]); |
| 217 | bn_mulw_addw(a[2], w, carry, &carry, &r[2]); | ||
| 218 | bn_mulw_addw(a[3], w, carry, &carry, &r[3]); | ||
| 219 | a += 4; | 216 | a += 4; |
| 220 | r += 4; | 217 | r += 4; |
| 221 | num -= 4; | 218 | num -= 4; |
| 222 | } | 219 | } |
| 223 | #endif | ||
| 224 | while (num) { | 220 | while (num) { |
| 225 | bn_mulw_addw(a[0], w, carry, &carry, &r[0]); | 221 | bn_mulw_addw(a[0], w, carry, &carry, &r[0]); |
| 226 | a++; | 222 | a++; |
| @@ -247,17 +243,14 @@ bn_mul_add_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w) | |||
| 247 | if (num <= 0) | 243 | if (num <= 0) |
| 248 | return 0; | 244 | return 0; |
| 249 | 245 | ||
| 250 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
| 251 | while (num & ~3) { | 246 | while (num & ~3) { |
| 252 | bn_mulw_addw_addw(a[0], w, r[0], carry, &carry, &r[0]); | 247 | bn_qwmulw_addqw_addw(a[3], a[2], a[1], a[0], w, |
| 253 | bn_mulw_addw_addw(a[1], w, r[1], carry, &carry, &r[1]); | 248 | r[3], r[2], r[1], r[0], carry, &carry, |
| 254 | bn_mulw_addw_addw(a[2], w, r[2], carry, &carry, &r[2]); | 249 | &r[3], &r[2], &r[1], &r[0]); |
| 255 | bn_mulw_addw_addw(a[3], w, r[3], carry, &carry, &r[3]); | ||
| 256 | a += 4; | 250 | a += 4; |
| 257 | r += 4; | 251 | r += 4; |
| 258 | num -= 4; | 252 | num -= 4; |
| 259 | } | 253 | } |
| 260 | #endif | ||
| 261 | while (num) { | 254 | while (num) { |
| 262 | bn_mulw_addw_addw(a[0], w, r[0], carry, &carry, &r[0]); | 255 | bn_mulw_addw_addw(a[0], w, r[0], carry, &carry, &r[0]); |
| 263 | a++; | 256 | a++; |
