From 28d52ec2924676a240d0477f564160bd054d5549 Mon Sep 17 00:00:00 2001 From: jsing <> Date: Mon, 11 Aug 2025 14:11:20 +0000 Subject: Resync s2n-bignum primitives for amd64 with upstream. This amounts to whitespace changes and label renaming. --- src/lib/libcrypto/bn/arch/amd64/bignum_add.S | 49 +++++++++++----------- src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S | 30 ++++++------- src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S | 26 ++++++------ src/lib/libcrypto/bn/arch/amd64/bignum_mul.S | 23 +++++----- .../libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S | 6 +-- .../libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S | 7 ++-- src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S | 27 ++++++------ .../libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S | 6 +-- .../libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S | 5 ++- src/lib/libcrypto/bn/arch/amd64/bignum_sub.S | 45 ++++++++++---------- src/lib/libcrypto/bn/arch/amd64/word_clz.S | 4 +- 11 files changed, 113 insertions(+), 115 deletions(-) (limited to 'src/lib') diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S b/src/lib/libcrypto/bn/arch/amd64/bignum_add.S index 5fe4aae7a1..5ec0e36282 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_add.S @@ -16,9 +16,8 @@ // Add, z := x + y // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] // -// extern uint64_t bignum_add -// (uint64_t p, uint64_t *z, -// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); +// extern uint64_t bignum_add(uint64_t p, uint64_t *z, uint64_t m, +// const uint64_t *x, uint64_t n, const uint64_t *y); // // Does the z := x + y operation, truncating modulo p words in general and // returning a top carry (0 or 1) in the p'th place, only adding the input @@ -49,7 +48,7 @@ S2N_BN_SYMBOL(bignum_add): - _CET_ENDBR + _CET_ENDBR #if WINDOWS_ABI push rdi @@ -75,7 +74,7 @@ S2N_BN_SYMBOL(bignum_add): cmp p, n cmovc n, p cmp m, n - jc ylonger + jc bignum_add_ylonger // The case where x is longer or of the same size (p >= m >= n) @@ -83,27 +82,27 @@ S2N_BN_SYMBOL(bignum_add): sub m, n inc m test n, n - jz xtest -xmainloop: + jz bignum_add_xtest +bignum_add_xmainloop: mov a, [x+8*i] adc a, [y+8*i] mov [z+8*i],a inc i dec n - jnz xmainloop - jmp xtest -xtoploop: + jnz bignum_add_xmainloop + jmp bignum_add_xtest +bignum_add_xtoploop: mov a, [x+8*i] adc a, 0 mov [z+8*i],a inc i -xtest: +bignum_add_xtest: dec m - jnz xtoploop + jnz bignum_add_xtoploop mov ashort, 0 adc a, 0 test p, p - jnz tails + jnz bignum_add_tails #if WINDOWS_ABI pop rsi pop rdi @@ -112,30 +111,30 @@ xtest: // The case where y is longer (p >= n > m) -ylonger: +bignum_add_ylonger: sub p, n sub n, m test m, m - jz ytoploop -ymainloop: + jz bignum_add_ytoploop +bignum_add_ymainloop: mov a, [x+8*i] adc a, [y+8*i] mov [z+8*i],a inc i dec m - jnz ymainloop -ytoploop: + jnz bignum_add_ymainloop +bignum_add_ytoploop: mov a, [y+8*i] adc a, 0 mov [z+8*i],a inc i dec n - jnz ytoploop + jnz bignum_add_ytoploop mov ashort, 0 adc a, 0 test p, p - jnz tails + jnz bignum_add_tails #if WINDOWS_ABI pop rsi pop rdi @@ -144,16 +143,16 @@ ytoploop: // Adding a non-trivial tail, when p > max(m,n) -tails: +bignum_add_tails: mov [z+8*i],a xor a, a - jmp tail -tailloop: + jmp bignum_add_tail +bignum_add_tailloop: mov [z+8*i],a -tail: +bignum_add_tail: inc i dec p - jnz tailloop + jnz bignum_add_tailloop #if WINDOWS_ABI pop rsi pop rdi diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S index 25ba17bce2..ebbacec344 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S @@ -16,8 +16,8 @@ // Multiply-add with single-word multiplier, z := z + c * y // Inputs c, y[n]; outputs function return (carry-out) and z[k] // -// extern uint64_t bignum_cmadd -// (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); +// extern uint64_t bignum_cmadd(uint64_t k, uint64_t *z, uint64_t c, uint64_t n, +// const uint64_t *y); // // Does the "z := z + c * y" operation where y is n digits, result z is p. // Truncates the result in general. @@ -54,7 +54,7 @@ S2N_BN_SYMBOL(bignum_cmadd): - _CET_ENDBR + _CET_ENDBR #if WINDOWS_ABI push rdi @@ -82,7 +82,7 @@ S2N_BN_SYMBOL(bignum_cmadd): xor h, h test n, n - jz end + jz bignum_cmadd_end // Move c into a safer register as multiplies overwrite rdx @@ -96,11 +96,11 @@ S2N_BN_SYMBOL(bignum_cmadd): mov h, rdx mov ishort, 1 dec n - jz hightail + jz bignum_cmadd_hightail // Main loop, where we always have CF + previous high part h to add in -loop: +bignum_cmadd_loop: adc h, [z+8*i] sbb r, r mov rax, [x+8*i] @@ -111,36 +111,36 @@ loop: mov h, rdx inc i dec n - jnz loop + jnz bignum_cmadd_loop -hightail: +bignum_cmadd_hightail: adc h, 0 // Propagate the carry all the way to the end with h as extra carry word -tail: +bignum_cmadd_tail: test p, p - jz end + jz bignum_cmadd_end add [z+8*i], h mov hshort, 0 inc i dec p - jz highend + jz bignum_cmadd_highend -tloop: +bignum_cmadd_tloop: adc [z+8*i], h inc i dec p - jnz tloop + jnz bignum_cmadd_tloop -highend: +bignum_cmadd_highend: adc h, 0 // Return the high/carry word -end: +bignum_cmadd_end: mov rax, h pop rbx diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S index 12f785d63a..3e28e37535 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S @@ -16,8 +16,8 @@ // Multiply by a single word, z := c * y // Inputs c, y[n]; outputs function return (carry-out) and z[k] // -// extern uint64_t bignum_cmul -// (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); +// extern uint64_t bignum_cmul(uint64_t k, uint64_t *z, uint64_t c, uint64_t n, +// const uint64_t *y); // // Does the "z := c * y" operation where y is n digits, result z is p. // Truncates the result in general unless p >= n + 1. @@ -51,7 +51,7 @@ S2N_BN_SYMBOL(bignum_cmul): - _CET_ENDBR + _CET_ENDBR #if WINDOWS_ABI push rdi @@ -76,7 +76,7 @@ S2N_BN_SYMBOL(bignum_cmul): xor h, h xor i, i test n, n - jz tail + jz bignum_cmul_tail // Move c into a safer register as multiplies overwrite rdx @@ -90,11 +90,11 @@ S2N_BN_SYMBOL(bignum_cmul): mov h, rdx inc i cmp i, n - jz tail + jz bignum_cmul_tail // Main loop doing the multiplications -loop: +bignum_cmul_loop: mov rax, [x+8*i] mul c add rax, h @@ -103,28 +103,28 @@ loop: mov h, rdx inc i cmp i, n - jc loop + jc bignum_cmul_loop // Add a tail when the destination is longer -tail: +bignum_cmul_tail: cmp i, p - jnc end + jnc bignum_cmul_end mov [z+8*i], h xor h, h inc i cmp i, p - jnc end + jnc bignum_cmul_end -tloop: +bignum_cmul_tloop: mov [z+8*i], h inc i cmp i, p - jc tloop + jc bignum_cmul_tloop // Return the high/carry word -end: +bignum_cmul_end: mov rax, h #if WINDOWS_ABI diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S index a3552679a2..3bc09de30a 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S @@ -16,9 +16,8 @@ // Multiply z := x * y // Inputs x[m], y[n]; output z[k] // -// extern void bignum_mul -// (uint64_t k, uint64_t *z, -// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); +// extern void bignum_mul(uint64_t k, uint64_t *z, uint64_t m, const uint64_t *x, +// uint64_t n, const uint64_t *y); // // Does the "z := x * y" operation where x is m digits, y is n, result z is k. // Truncates the result in general unless k >= m + n @@ -59,7 +58,7 @@ S2N_BN_SYMBOL(bignum_mul): - _CET_ENDBR + _CET_ENDBR #if WINDOWS_ABI push rdi @@ -88,7 +87,7 @@ S2N_BN_SYMBOL(bignum_mul): // If we did a multiply-add variant, however, then we could test p, p - jz end + jz bignum_mul_end // Set initial 2-part sum to zero (we zero c inside the body) @@ -99,7 +98,7 @@ S2N_BN_SYMBOL(bignum_mul): xor k, k -outerloop: +bignum_mul_outerloop: // Zero our carry term first; we eventually want it and a zero is useful now // Set a = max 0 (k + 1 - n), i = min (k + 1) m @@ -125,11 +124,11 @@ outerloop: mov d, k sub d, i sub i, a - jbe innerend + jbe bignum_mul_innerend lea x,[rcx+8*a] lea y,[r9+8*d-8] -innerloop: +bignum_mul_innerloop: mov rax, [y+8*i] mul QWORD PTR [x] add x, 8 @@ -137,9 +136,9 @@ innerloop: adc h, rdx adc c, 0 dec i - jnz innerloop + jnz bignum_mul_innerloop -innerend: +bignum_mul_innerend: mov [z], l mov l, h @@ -147,9 +146,9 @@ innerend: add z, 8 cmp k, p - jc outerloop + jc bignum_mul_outerloop -end: +bignum_mul_end: pop r15 pop r14 pop r13 diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S index 70ff69e372..5e04bcc009 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S @@ -16,8 +16,8 @@ // Multiply z := x * y // Inputs x[4], y[4]; output z[8] // -// extern void bignum_mul_4_8_alt -// (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]); +// extern void bignum_mul_4_8_alt(uint64_t z[static 8], const uint64_t x[static 4], +// const uint64_t y[static 4]); // // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y @@ -72,7 +72,7 @@ adc h, rdx S2N_BN_SYMBOL(bignum_mul_4_8_alt): - _CET_ENDBR + _CET_ENDBR #if WINDOWS_ABI push rdi diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S index 066403b074..4d54168c90 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S @@ -16,8 +16,9 @@ // Multiply z := x * y // Inputs x[8], y[8]; output z[16] // -// extern void bignum_mul_8_16_alt -// (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]); +// extern void bignum_mul_8_16_alt(uint64_t z[static 16], +// const uint64_t x[static 8], +// const uint64_t y[static 8]); // // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y @@ -72,7 +73,7 @@ adc h, rdx S2N_BN_SYMBOL(bignum_mul_8_16_alt): - _CET_ENDBR + _CET_ENDBR #if WINDOWS_ABI push rdi diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S index 54e3f59442..48cc182b72 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S @@ -16,8 +16,7 @@ // Square z := x^2 // Input x[n]; output z[k] // -// extern void bignum_sqr -// (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x); +// extern void bignum_sqr(uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x); // // Does the "z := x^2" operation where x is n digits and result z is k. // Truncates the result in general unless k >= 2 * n @@ -62,7 +61,7 @@ #define llshort ebp S2N_BN_SYMBOL(bignum_sqr): - _CET_ENDBR + _CET_ENDBR #if WINDOWS_ABI push rdi @@ -86,7 +85,7 @@ S2N_BN_SYMBOL(bignum_sqr): // If p = 0 the result is trivial and nothing needs doing test p, p - jz end + jz bignum_sqr_end // initialize (hh,ll) = 0 @@ -97,7 +96,7 @@ S2N_BN_SYMBOL(bignum_sqr): xor k, k -outerloop: +bignum_sqr_outerloop: // First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n // We want to accumulate all x[i] * x[k - i] for bot <= i < top @@ -122,7 +121,7 @@ outerloop: // If htop <= bot then main doubled part of the sum is empty cmp i, htop - jnc nosumming + jnc bignum_sqr_nosumming // Use a moving pointer for [y] = x[k-i] for the cofactor @@ -132,7 +131,7 @@ outerloop: // Do the main part of the sum x[i] * x[k - i] for 2 * i < k -innerloop: +bignum_sqr_innerloop: mov a, [x+8*i] mul QWORD PTR [y] add l, a @@ -141,7 +140,7 @@ innerloop: sub y, 8 inc i cmp i, htop - jc innerloop + jc bignum_sqr_innerloop // Now double it @@ -151,11 +150,11 @@ innerloop: // If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term -nosumming: +bignum_sqr_nosumming: test k, 1 - jnz innerend + jnz bignum_sqr_innerend cmp i, n - jnc innerend + jnc bignum_sqr_innerend mov a, [x+8*i] mul a @@ -165,7 +164,7 @@ nosumming: // Now add the local sum into the global sum, store and shift -innerend: +bignum_sqr_innerend: add l, ll mov [z+8*k], l adc h, hh @@ -175,11 +174,11 @@ innerend: inc k cmp k, p - jc outerloop + jc bignum_sqr_outerloop // Restore registers and return -end: +bignum_sqr_end: pop r15 pop r14 pop r13 diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S index 7c534ae907..cb0eec0eea 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S @@ -16,8 +16,8 @@ // Square, z := x^2 // Input x[4]; output z[8] // -// extern void bignum_sqr_4_8_alt -// (uint64_t z[static 8], uint64_t x[static 4]); +// extern void bignum_sqr_4_8_alt(uint64_t z[static 8], +// const uint64_t x[static 4]); // // Standard x86-64 ABI: RDI = z, RSI = x // Microsoft x64 ABI: RCX = z, RDX = x @@ -71,7 +71,7 @@ adc c, 0 S2N_BN_SYMBOL(bignum_sqr_4_8_alt): - _CET_ENDBR + _CET_ENDBR #if WINDOWS_ABI push rdi diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S index ac0b6f96c2..04577d56cf 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S @@ -16,7 +16,8 @@ // Square, z := x^2 // Input x[8]; output z[16] // -// extern void bignum_sqr_8_16_alt (uint64_t z[static 16], uint64_t x[static 8]); +// extern void bignum_sqr_8_16_alt(uint64_t z[static 16], +// const uint64_t x[static 8]); // // Standard x86-64 ABI: RDI = z, RSI = x // Microsoft x64 ABI: RCX = z, RDX = x @@ -103,7 +104,7 @@ adc c, 0 S2N_BN_SYMBOL(bignum_sqr_8_16_alt): - _CET_ENDBR + _CET_ENDBR #if WINDOWS_ABI push rdi diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S index 3ff8a30510..a18e86ba7c 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S @@ -16,9 +16,8 @@ // Subtract, z := x - y // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] // -// extern uint64_t bignum_sub -// (uint64_t p, uint64_t *z, -// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); +// extern uint64_t bignum_sub(uint64_t p, uint64_t *z, uint64_t m, +// const uint64_t *x, uint64_t n, const uint64_t *y); // // Does the z := x - y operation, truncating modulo p words in general and // returning a top borrow (0 or 1) in the p'th place, only subtracting input @@ -49,7 +48,7 @@ S2N_BN_SYMBOL(bignum_sub): - _CET_ENDBR + _CET_ENDBR #if WINDOWS_ABI push rdi @@ -75,7 +74,7 @@ S2N_BN_SYMBOL(bignum_sub): cmp p, n cmovc n, p cmp m, n - jc ylonger + jc bignum_sub_ylonger // The case where x is longer or of the same size (p >= m >= n) @@ -83,32 +82,32 @@ S2N_BN_SYMBOL(bignum_sub): sub m, n inc m test n, n - jz xtest -xmainloop: + jz bignum_sub_xtest +bignum_sub_xmainloop: mov a, [x+8*i] sbb a, [y+8*i] mov [z+8*i],a inc i dec n - jnz xmainloop - jmp xtest -xtoploop: + jnz bignum_sub_xmainloop + jmp bignum_sub_xtest +bignum_sub_xtoploop: mov a, [x+8*i] sbb a, 0 mov [z+8*i],a inc i -xtest: +bignum_sub_xtest: dec m - jnz xtoploop + jnz bignum_sub_xtoploop sbb a, a test p, p - jz tailskip -tailloop: + jz bignum_sub_tailskip +bignum_sub_tailloop: mov [z+8*i],a inc i dec p - jnz tailloop -tailskip: + jnz bignum_sub_tailloop +bignum_sub_tailskip: neg a #if WINDOWS_ABI pop rsi @@ -118,29 +117,29 @@ tailskip: // The case where y is longer (p >= n > m) -ylonger: +bignum_sub_ylonger: sub p, n sub n, m test m, m - jz ytoploop -ymainloop: + jz bignum_sub_ytoploop +bignum_sub_ymainloop: mov a, [x+8*i] sbb a, [y+8*i] mov [z+8*i],a inc i dec m - jnz ymainloop -ytoploop: + jnz bignum_sub_ymainloop +bignum_sub_ytoploop: mov ashort, 0 sbb a, [y+8*i] mov [z+8*i],a inc i dec n - jnz ytoploop + jnz bignum_sub_ytoploop sbb a, a test p, p - jnz tailloop + jnz bignum_sub_tailloop neg a #if WINDOWS_ABI pop rsi diff --git a/src/lib/libcrypto/bn/arch/amd64/word_clz.S b/src/lib/libcrypto/bn/arch/amd64/word_clz.S index 3926fcd4b0..84c9c8275d 100644 --- a/src/lib/libcrypto/bn/arch/amd64/word_clz.S +++ b/src/lib/libcrypto/bn/arch/amd64/word_clz.S @@ -16,7 +16,7 @@ // Count leading zero bits in a single word // Input a; output function return // -// extern uint64_t word_clz (uint64_t a); +// extern uint64_t word_clz(uint64_t a); // // Standard x86-64 ABI: RDI = a, returns RAX // Microsoft x64 ABI: RCX = a, returns RAX @@ -30,7 +30,7 @@ .text S2N_BN_SYMBOL(word_clz): - _CET_ENDBR + _CET_ENDBR #if WINDOWS_ABI push rdi -- cgit v1.2.3-55-g6feb