diff options
| author | jsing <> | 2023-02-17 05:13:34 +0000 |
|---|---|---|
| committer | jsing <> | 2023-02-17 05:13:34 +0000 |
| commit | e251eb23d4d05166e78527447e36f14b8598ca5f (patch) | |
| tree | b83e94b45b8911aac35bd66431cc5c5c9cd89015 /src | |
| parent | 02f91f3325f965a313f0cffe5053a084d05a84ea (diff) | |
| download | openbsd-e251eb23d4d05166e78527447e36f14b8598ca5f.tar.gz openbsd-e251eb23d4d05166e78527447e36f14b8598ca5f.tar.bz2 openbsd-e251eb23d4d05166e78527447e36f14b8598ca5f.zip | |
Reimplement bn_sqr_comba{4,8}().
Use bignum primitives rather than the current mess of macros.The sqr_add_c
macro gets replaced with bn_mulw_addtw(), while the sqr_add_c2 macro gets
replaced with bn_mul2_mulw_addtw().
The variables in the comba functions have also been reordered, so that the
patterns are easier to understand - the compiler can take care of
optimising the inputs and outputs to avoid register moves.
ok tb@
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib/libcrypto/bn/bn_internal.h | 30 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn_sqr.c | 182 |
2 files changed, 110 insertions, 102 deletions
diff --git a/src/lib/libcrypto/bn/bn_internal.h b/src/lib/libcrypto/bn/bn_internal.h index acee2b4020..859f00d05d 100644 --- a/src/lib/libcrypto/bn/bn_internal.h +++ b/src/lib/libcrypto/bn/bn_internal.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_internal.h,v 1.8 2023/02/16 10:58:06 jsing Exp $ */ | 1 | /* $OpenBSD: bn_internal.h,v 1.9 2023/02/17 05:13:34 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -361,4 +361,32 @@ bn_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0, | |||
| 361 | } | 361 | } |
| 362 | #endif | 362 | #endif |
| 363 | 363 | ||
| 364 | /* | ||
| 365 | * bn_mulw_addtw() computes (r2:r1:r0) = 2 * a * b + (c2:c1:c0), where a and b | ||
| 366 | * are single words and (c2:c1:c0) is a triple word, producing a triple word | ||
| 367 | * result. The caller must ensure that the inputs provided do not result in c2 | ||
| 368 | * overflowing. | ||
| 369 | */ | ||
| 370 | #ifndef HAVE_BN_MUL2_MULW_ADDTW | ||
| 371 | static inline void | ||
| 372 | bn_mul2_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0, | ||
| 373 | BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0) | ||
| 374 | { | ||
| 375 | BN_ULONG r2, r1, r0, x1, x0; | ||
| 376 | BN_ULONG carry; | ||
| 377 | |||
| 378 | bn_mulw(a, b, &x1, &x0); | ||
| 379 | bn_addw(c0, x0, &carry, &r0); | ||
| 380 | bn_addw(c1, x1 + carry, &r2, &r1); | ||
| 381 | bn_addw(c2, r2, &carry, &r2); | ||
| 382 | bn_addw(r0, x0, &carry, &r0); | ||
| 383 | bn_addw(r1, x1 + carry, &carry, &r1); | ||
| 384 | r2 += carry; | ||
| 385 | |||
| 386 | *out_r2 = r2; | ||
| 387 | *out_r1 = r1; | ||
| 388 | *out_r0 = r0; | ||
| 389 | } | ||
| 390 | #endif | ||
| 391 | |||
| 364 | #endif | 392 | #endif |
diff --git a/src/lib/libcrypto/bn/bn_sqr.c b/src/lib/libcrypto/bn/bn_sqr.c index f649b9bce8..6e784541bd 100644 --- a/src/lib/libcrypto/bn/bn_sqr.c +++ b/src/lib/libcrypto/bn/bn_sqr.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_sqr.c,v 1.26 2023/02/16 10:41:03 jsing Exp $ */ | 1 | /* $OpenBSD: bn_sqr.c,v 1.27 2023/02/17 05:13:34 jsing Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -66,117 +66,97 @@ | |||
| 66 | 66 | ||
| 67 | int bn_sqr(BIGNUM *r, const BIGNUM *a, int max, BN_CTX *ctx); | 67 | int bn_sqr(BIGNUM *r, const BIGNUM *a, int max, BN_CTX *ctx); |
| 68 | 68 | ||
| 69 | /* | ||
| 70 | * bn_sqr_comba4() computes r[] = a[] * a[] using Comba multiplication | ||
| 71 | * (https://everything2.com/title/Comba+multiplication), where a is a | ||
| 72 | * four word array, producing an eight word array result. | ||
| 73 | */ | ||
| 69 | #ifndef HAVE_BN_SQR_COMBA4 | 74 | #ifndef HAVE_BN_SQR_COMBA4 |
| 70 | void | 75 | void |
| 71 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | 76 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) |
| 72 | { | 77 | { |
| 73 | BN_ULONG c1, c2, c3; | 78 | BN_ULONG c2, c1, c0; |
| 74 | 79 | ||
| 75 | c1 = 0; | 80 | bn_mulw_addtw(a[0], a[0], 0, 0, 0, &c2, &c1, &r[0]); |
| 76 | c2 = 0; | 81 | |
| 77 | c3 = 0; | 82 | bn_mul2_mulw_addtw(a[1], a[0], 0, c2, c1, &c2, &c1, &r[1]); |
| 78 | sqr_add_c(a, 0, c1, c2, c3); | 83 | |
| 79 | r[0] = c1; | 84 | bn_mulw_addtw(a[1], a[1], 0, c2, c1, &c2, &c1, &c0); |
| 80 | c1 = 0; | 85 | bn_mul2_mulw_addtw(a[2], a[0], c2, c1, c0, &c2, &c1, &r[2]); |
| 81 | sqr_add_c2(a, 1, 0, c2, c3, c1); | 86 | |
| 82 | r[1] = c2; | 87 | bn_mul2_mulw_addtw(a[3], a[0], 0, c2, c1, &c2, &c1, &c0); |
| 83 | c2 = 0; | 88 | bn_mul2_mulw_addtw(a[2], a[1], c2, c1, c0, &c2, &c1, &r[3]); |
| 84 | sqr_add_c(a, 1, c3, c1, c2); | 89 | |
| 85 | sqr_add_c2(a, 2, 0, c3, c1, c2); | 90 | bn_mulw_addtw(a[2], a[2], 0, c2, c1, &c2, &c1, &c0); |
| 86 | r[2] = c3; | 91 | bn_mul2_mulw_addtw(a[3], a[1], c2, c1, c0, &c2, &c1, &r[4]); |
| 87 | c3 = 0; | 92 | |
| 88 | sqr_add_c2(a, 3, 0, c1, c2, c3); | 93 | bn_mul2_mulw_addtw(a[3], a[2], 0, c2, c1, &c2, &c1, &r[5]); |
| 89 | sqr_add_c2(a, 2, 1, c1, c2, c3); | 94 | |
| 90 | r[3] = c1; | 95 | bn_mulw_addtw(a[3], a[3], 0, c2, c1, &c2, &r[7], &r[6]); |
| 91 | c1 = 0; | ||
| 92 | sqr_add_c(a, 2, c2, c3, c1); | ||
| 93 | sqr_add_c2(a, 3, 1, c2, c3, c1); | ||
| 94 | r[4] = c2; | ||
| 95 | c2 = 0; | ||
| 96 | sqr_add_c2(a, 3, 2, c3, c1, c2); | ||
| 97 | r[5] = c3; | ||
| 98 | c3 = 0; | ||
| 99 | sqr_add_c(a, 3, c1, c2, c3); | ||
| 100 | r[6] = c1; | ||
| 101 | r[7] = c2; | ||
| 102 | } | 96 | } |
| 103 | #endif | 97 | #endif |
| 104 | 98 | ||
| 99 | /* | ||
| 100 | * bn_sqr_comba8() computes r[] = a[] * a[] using Comba multiplication | ||
| 101 | * (https://everything2.com/title/Comba+multiplication), where a is an | ||
| 102 | * eight word array, producing an 16 word array result. | ||
| 103 | */ | ||
| 105 | #ifndef HAVE_BN_SQR_COMBA8 | 104 | #ifndef HAVE_BN_SQR_COMBA8 |
| 106 | void | 105 | void |
| 107 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | 106 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) |
| 108 | { | 107 | { |
| 109 | BN_ULONG c1, c2, c3; | 108 | BN_ULONG c2, c1, c0; |
| 110 | 109 | ||
| 111 | c1 = 0; | 110 | bn_mulw_addtw(a[0], a[0], 0, 0, 0, &c2, &c1, &r[0]); |
| 112 | c2 = 0; | 111 | |
| 113 | c3 = 0; | 112 | bn_mul2_mulw_addtw(a[1], a[0], 0, c2, c1, &c2, &c1, &r[1]); |
| 114 | sqr_add_c(a, 0, c1, c2, c3); | 113 | |
| 115 | r[0] = c1; | 114 | bn_mulw_addtw(a[1], a[1], 0, c2, c1, &c2, &c1, &c0); |
| 116 | c1 = 0; | 115 | bn_mul2_mulw_addtw(a[2], a[0], c2, c1, c0, &c2, &c1, &r[2]); |
| 117 | sqr_add_c2(a, 1, 0, c2, c3, c1); | 116 | |
| 118 | r[1] = c2; | 117 | bn_mul2_mulw_addtw(a[3], a[0], 0, c2, c1, &c2, &c1, &c0); |
| 119 | c2 = 0; | 118 | bn_mul2_mulw_addtw(a[2], a[1], c2, c1, c0, &c2, &c1, &r[3]); |
| 120 | sqr_add_c(a, 1, c3, c1, c2); | 119 | |
| 121 | sqr_add_c2(a, 2, 0, c3, c1, c2); | 120 | bn_mulw_addtw(a[2], a[2], 0, c2, c1, &c2, &c1, &c0); |
| 122 | r[2] = c3; | 121 | bn_mul2_mulw_addtw(a[3], a[1], c2, c1, c0, &c2, &c1, &c0); |
| 123 | c3 = 0; | 122 | bn_mul2_mulw_addtw(a[4], a[0], c2, c1, c0, &c2, &c1, &r[4]); |
| 124 | sqr_add_c2(a, 3, 0, c1, c2, c3); | 123 | |
| 125 | sqr_add_c2(a, 2, 1, c1, c2, c3); | 124 | bn_mul2_mulw_addtw(a[5], a[0], 0, c2, c1, &c2, &c1, &c0); |
| 126 | r[3] = c1; | 125 | bn_mul2_mulw_addtw(a[4], a[1], c2, c1, c0, &c2, &c1, &c0); |
| 127 | c1 = 0; | 126 | bn_mul2_mulw_addtw(a[3], a[2], c2, c1, c0, &c2, &c1, &r[5]); |
| 128 | sqr_add_c(a, 2, c2, c3, c1); | 127 | |
| 129 | sqr_add_c2(a, 3, 1, c2, c3, c1); | 128 | bn_mulw_addtw(a[3], a[3], 0, c2, c1, &c2, &c1, &c0); |
| 130 | sqr_add_c2(a, 4, 0, c2, c3, c1); | 129 | bn_mul2_mulw_addtw(a[4], a[2], c2, c1, c0, &c2, &c1, &c0); |
| 131 | r[4] = c2; | 130 | bn_mul2_mulw_addtw(a[5], a[1], c2, c1, c0, &c2, &c1, &c0); |
| 132 | c2 = 0; | 131 | bn_mul2_mulw_addtw(a[6], a[0], c2, c1, c0, &c2, &c1, &r[6]); |
| 133 | sqr_add_c2(a, 5, 0, c3, c1, c2); | 132 | |
| 134 | sqr_add_c2(a, 4, 1, c3, c1, c2); | 133 | bn_mul2_mulw_addtw(a[7], a[0], 0, c2, c1, &c2, &c1, &c0); |
| 135 | sqr_add_c2(a, 3, 2, c3, c1, c2); | 134 | bn_mul2_mulw_addtw(a[6], a[1], c2, c1, c0, &c2, &c1, &c0); |
| 136 | r[5] = c3; | 135 | bn_mul2_mulw_addtw(a[5], a[2], c2, c1, c0, &c2, &c1, &c0); |
| 137 | c3 = 0; | 136 | bn_mul2_mulw_addtw(a[4], a[3], c2, c1, c0, &c2, &c1, &r[7]); |
| 138 | sqr_add_c(a, 3, c1, c2, c3); | 137 | |
| 139 | sqr_add_c2(a, 4, 2, c1, c2, c3); | 138 | bn_mulw_addtw(a[4], a[4], 0, c2, c1, &c2, &c1, &c0); |
| 140 | sqr_add_c2(a, 5, 1, c1, c2, c3); | 139 | bn_mul2_mulw_addtw(a[5], a[3], c2, c1, c0, &c2, &c1, &c0); |
| 141 | sqr_add_c2(a, 6, 0, c1, c2, c3); | 140 | bn_mul2_mulw_addtw(a[6], a[2], c2, c1, c0, &c2, &c1, &c0); |
| 142 | r[6] = c1; | 141 | bn_mul2_mulw_addtw(a[7], a[1], c2, c1, c0, &c2, &c1, &r[8]); |
| 143 | c1 = 0; | 142 | |
| 144 | sqr_add_c2(a, 7, 0, c2, c3, c1); | 143 | bn_mul2_mulw_addtw(a[7], a[2], 0, c2, c1, &c2, &c1, &c0); |
| 145 | sqr_add_c2(a, 6, 1, c2, c3, c1); | 144 | bn_mul2_mulw_addtw(a[6], a[3], c2, c1, c0, &c2, &c1, &c0); |
| 146 | sqr_add_c2(a, 5, 2, c2, c3, c1); | 145 | bn_mul2_mulw_addtw(a[5], a[4], c2, c1, c0, &c2, &c1, &r[9]); |
| 147 | sqr_add_c2(a, 4, 3, c2, c3, c1); | 146 | |
| 148 | r[7] = c2; | 147 | bn_mulw_addtw(a[5], a[5], 0, c2, c1, &c2, &c1, &c0); |
| 149 | c2 = 0; | 148 | bn_mul2_mulw_addtw(a[6], a[4], c2, c1, c0, &c2, &c1, &c0); |
| 150 | sqr_add_c(a, 4, c3, c1, c2); | 149 | bn_mul2_mulw_addtw(a[7], a[3], c2, c1, c0, &c2, &c1, &r[10]); |
| 151 | sqr_add_c2(a, 5, 3, c3, c1, c2); | 150 | |
| 152 | sqr_add_c2(a, 6, 2, c3, c1, c2); | 151 | bn_mul2_mulw_addtw(a[7], a[4], 0, c2, c1, &c2, &c1, &c0); |
| 153 | sqr_add_c2(a, 7, 1, c3, c1, c2); | 152 | bn_mul2_mulw_addtw(a[6], a[5], c2, c1, c0, &c2, &c1, &r[11]); |
| 154 | r[8] = c3; | 153 | |
| 155 | c3 = 0; | 154 | bn_mulw_addtw(a[6], a[6], 0, c2, c1, &c2, &c1, &c0); |
| 156 | sqr_add_c2(a, 7, 2, c1, c2, c3); | 155 | bn_mul2_mulw_addtw(a[7], a[5], c2, c1, c0, &c2, &c1, &r[12]); |
| 157 | sqr_add_c2(a, 6, 3, c1, c2, c3); | 156 | |
| 158 | sqr_add_c2(a, 5, 4, c1, c2, c3); | 157 | bn_mul2_mulw_addtw(a[7], a[6], 0, c2, c1, &c2, &c1, &r[13]); |
| 159 | r[9] = c1; | 158 | |
| 160 | c1 = 0; | 159 | bn_mulw_addtw(a[7], a[7], 0, c2, c1, &c2, &r[15], &r[14]); |
| 161 | sqr_add_c(a, 5, c2, c3, c1); | ||
| 162 | sqr_add_c2(a, 6, 4, c2, c3, c1); | ||
| 163 | sqr_add_c2(a, 7, 3, c2, c3, c1); | ||
| 164 | r[10] = c2; | ||
| 165 | c2 = 0; | ||
| 166 | sqr_add_c2(a, 7, 4, c3, c1, c2); | ||
| 167 | sqr_add_c2(a, 6, 5, c3, c1, c2); | ||
| 168 | r[11] = c3; | ||
| 169 | c3 = 0; | ||
| 170 | sqr_add_c(a, 6, c1, c2, c3); | ||
| 171 | sqr_add_c2(a, 7, 5, c1, c2, c3); | ||
| 172 | r[12] = c1; | ||
| 173 | c1 = 0; | ||
| 174 | sqr_add_c2(a, 7, 6, c2, c3, c1); | ||
| 175 | r[13] = c2; | ||
| 176 | c2 = 0; | ||
| 177 | sqr_add_c(a, 7, c3, c1, c2); | ||
| 178 | r[14] = c3; | ||
| 179 | r[15] = c1; | ||
| 180 | } | 160 | } |
| 181 | #endif | 161 | #endif |
| 182 | 162 | ||
