diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2021-10-06 17:17:34 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2021-10-06 17:17:34 +0200 |
commit | 911344a99889319a7dba8a725a64dc324597f9eb (patch) | |
tree | 7273dd7914feb9b87a1b9e692a0dc432bb31ae96 | |
parent | 22fd8fd3f4c271d6037753165131c7c35a039762 (diff) | |
download | busybox-w32-911344a99889319a7dba8a725a64dc324597f9eb.tar.gz busybox-w32-911344a99889319a7dba8a725a64dc324597f9eb.tar.bz2 busybox-w32-911344a99889319a7dba8a725a64dc324597f9eb.zip |
tls: P256: x86-64 assembly
function old new delta
sp_256_mont_mul_8 127 155 +28
sp_256_proj_point_dbl_8 448 469 +21
sp_256_mont_sub_8 23 35 +12
sp_256_mont_dbl_8 26 38 +12
sp_256_sub_8 44 49 +5
sp_256_ecc_mulmod_8 1530 1535 +5
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 6/0 up/down: 83/0) Total: 83 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | networking/tls_sp_c32.c | 99 |
1 files changed, 99 insertions, 0 deletions
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index 532047739..14a7c7066 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c | |||
@@ -195,6 +195,34 @@ static int sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | |||
195 | : "memory" | 195 | : "memory" |
196 | ); | 196 | ); |
197 | return reg; | 197 | return reg; |
198 | #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) | ||
199 | /* x86_64 has no alignment restrictions, and is little-endian, | ||
200 | * so 64-bit and 32-bit representations are identical */ | ||
201 | uint64_t reg; | ||
202 | asm volatile ( | ||
203 | "\n movq (%0), %3" | ||
204 | "\n addq (%1), %3" | ||
205 | "\n movq %3, (%2)" | ||
206 | "\n" | ||
207 | "\n movq 1*8(%0), %3" | ||
208 | "\n adcq 1*8(%1), %3" | ||
209 | "\n movq %3, 1*8(%2)" | ||
210 | "\n" | ||
211 | "\n movq 2*8(%0), %3" | ||
212 | "\n adcq 2*8(%1), %3" | ||
213 | "\n movq %3, 2*8(%2)" | ||
214 | "\n" | ||
215 | "\n movq 3*8(%0), %3" | ||
216 | "\n adcq 3*8(%1), %3" | ||
217 | "\n movq %3, 3*8(%2)" | ||
218 | "\n" | ||
219 | "\n sbbq %3, %3" | ||
220 | "\n" | ||
221 | : "=r" (a), "=r" (b), "=r" (r), "=r" (reg) | ||
222 | : "0" (a), "1" (b), "2" (r) | ||
223 | : "memory" | ||
224 | ); | ||
225 | return reg; | ||
198 | #else | 226 | #else |
199 | int i; | 227 | int i; |
200 | sp_digit carry; | 228 | sp_digit carry; |
@@ -265,6 +293,34 @@ static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | |||
265 | : "memory" | 293 | : "memory" |
266 | ); | 294 | ); |
267 | return reg; | 295 | return reg; |
296 | #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) | ||
297 | /* x86_64 has no alignment restrictions, and is little-endian, | ||
298 | * so 64-bit and 32-bit representations are identical */ | ||
299 | uint64_t reg; | ||
300 | asm volatile ( | ||
301 | "\n movq (%0), %3" | ||
302 | "\n subq (%1), %3" | ||
303 | "\n movq %3, (%2)" | ||
304 | "\n" | ||
305 | "\n movq 1*8(%0), %3" | ||
306 | "\n sbbq 1*8(%1), %3" | ||
307 | "\n movq %3, 1*8(%2)" | ||
308 | "\n" | ||
309 | "\n movq 2*8(%0), %3" | ||
310 | "\n sbbq 2*8(%1), %3" | ||
311 | "\n movq %3, 2*8(%2)" | ||
312 | "\n" | ||
313 | "\n movq 3*8(%0), %3" | ||
314 | "\n sbbq 3*8(%1), %3" | ||
315 | "\n movq %3, 3*8(%2)" | ||
316 | "\n" | ||
317 | "\n sbbq %3, %3" | ||
318 | "\n" | ||
319 | : "=r" (a), "=r" (b), "=r" (r), "=r" (reg) | ||
320 | : "0" (a), "1" (b), "2" (r) | ||
321 | : "memory" | ||
322 | ); | ||
323 | return reg; | ||
268 | #else | 324 | #else |
269 | int i; | 325 | int i; |
270 | sp_digit borrow; | 326 | sp_digit borrow; |
@@ -380,6 +436,49 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | |||
380 | } | 436 | } |
381 | r[15] = accl; | 437 | r[15] = accl; |
382 | memcpy(r, rr, sizeof(rr)); | 438 | memcpy(r, rr, sizeof(rr)); |
439 | #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) | ||
440 | /* x86_64 has no alignment restrictions, and is little-endian, | ||
441 | * so 64-bit and 32-bit representations are identical */ | ||
442 | const uint64_t* aa = (const void*)a; | ||
443 | const uint64_t* bb = (const void*)b; | ||
444 | uint64_t rr[8]; | ||
445 | int k; | ||
446 | uint64_t accl; | ||
447 | uint64_t acch; | ||
448 | |||
449 | acch = accl = 0; | ||
450 | for (k = 0; k < 7; k++) { | ||
451 | int i, j; | ||
452 | uint64_t acc_hi; | ||
453 | i = k - 3; | ||
454 | if (i < 0) | ||
455 | i = 0; | ||
456 | j = k - i; | ||
457 | acc_hi = 0; | ||
458 | do { | ||
459 | //////////////////////// | ||
460 | // uint128_t m = ((uint128_t)a[i]) * b[j]; | ||
461 | // acc_hi:acch:accl += m; | ||
462 | asm volatile ( | ||
463 | // aa[i] is already loaded in %%rax | ||
464 | "\n mulq %7" | ||
465 | "\n addq %%rax, %0" | ||
466 | "\n adcq %%rdx, %1" | ||
467 | "\n adcq $0, %2" | ||
468 | : "=rm" (accl), "=rm" (acch), "=rm" (acc_hi) | ||
469 | : "0" (accl), "1" (acch), "2" (acc_hi), "a" (aa[i]), "m" (bb[j]) | ||
470 | : "cc", "dx" | ||
471 | ); | ||
472 | //////////////////////// | ||
473 | j--; | ||
474 | i++; | ||
475 | } while (i != 4 && i <= k); | ||
476 | rr[k] = accl; | ||
477 | accl = acch; | ||
478 | acch = acc_hi; | ||
479 | } | ||
480 | rr[7] = accl; | ||
481 | memcpy(r, rr, sizeof(rr)); | ||
383 | #elif 0 | 482 | #elif 0 |
384 | //TODO: arm assembly (untested) | 483 | //TODO: arm assembly (untested) |
385 | sp_digit tmp[16]; | 484 | sp_digit tmp[16]; |