13 files changed, 0 insertions, 2063 deletions
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S b/src/lib/libcrypto/bn/arch/amd64/bignum_add.S
deleted file mode 100644
index 5fe4aae7a1..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S
+++ /dev/null
@@ -1,165 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-//
-// Permission to use, copy, modify, and/or distribute this software for any
-// purpose with or without fee is hereby granted, provided that the above
-// copyright notice and this permission notice appear in all copies.
-//
-// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-// ----------------------------------------------------------------------------
-// Add, z := x + y
-// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
-//
-//    extern uint64_t bignum_add
-//     (uint64_t p, uint64_t *z,
-//      uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
-//
-// Does the z := x + y operation, truncating modulo p words in general and
-// returning a top carry (0 or 1) in the p'th place, only adding the input
-// words below p (as well as m and n respectively) to get the sum and carry.
-//
-// Standard x86-64 ABI: RDI = p, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y, returns RAX
-// Microsoft x64 ABI:   RCX = p, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y, returns RAX
-// ----------------------------------------------------------------------------
-#include "s2n_bignum_internal.h"
-        .intel_syntax noprefix
-        S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_add)
-        S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_add)
-        .text
-#define p rdi
-#define z rsi
-#define m rdx
-#define x rcx
-#define n r8
-#define y r9
-#define i r10
-#define a rax
-#define ashort eax
-S2N_BN_SYMBOL(bignum_add):
-        _CET_ENDBR
-#if WINDOWS_ABI
-        push    rdi
-        push    rsi
-        mov     rdi, rcx
-        mov     rsi, rdx
-        mov     rdx, r8
-        mov     rcx, r9
-        mov     r8, [rsp+56]
-        mov     r9, [rsp+64]
-#endif
-// Zero the main index counter for both branches
-        xor     i, i
-// First clamp the two input sizes m := min(p,m) and n := min(p,n) since
-// we'll never need words past the p'th. Can now assume m <= p and n <= p.
-// Then compare the modified m and n and branch accordingly
-        cmp     p, m
-        cmovc   m, p
-        cmp     p, n
-        cmovc   n, p
-        cmp     m, n
-        jc      ylonger
-// The case where x is longer or of the same size (p >= m >= n)
-        sub     p, m
-        sub     m, n
-        inc     m
-        test    n, n
-        jz      xtest
-xmainloop:
-        mov     a, [x+8*i]
-        adc     a, [y+8*i]
-        mov     [z+8*i],a
-        inc     i
-        dec     n
-        jnz     xmainloop
-        jmp     xtest
-xtoploop:
-        mov     a, [x+8*i]
-        adc     a, 0
-        mov     [z+8*i],a
-        inc     i
-xtest:
-        dec     m
-        jnz     xtoploop
-        mov     ashort, 0
-        adc     a, 0
-        test    p, p
-        jnz     tails
-#if WINDOWS_ABI
-        pop    rsi
-        pop    rdi
-#endif
-        ret
-// The case where y is longer (p >= n > m)
-ylonger:
-        sub     p, n
-        sub     n, m
-        test    m, m
-        jz      ytoploop
-ymainloop:
-        mov     a, [x+8*i]
-        adc     a, [y+8*i]
-        mov     [z+8*i],a
-        inc     i
-        dec     m
-        jnz     ymainloop
-ytoploop:
-        mov     a, [y+8*i]
-        adc     a, 0
-        mov     [z+8*i],a
-        inc     i
-        dec     n
-        jnz     ytoploop
-        mov     ashort, 0
-        adc     a, 0
-        test    p, p
-        jnz     tails
-#if WINDOWS_ABI
-        pop    rsi
-        pop    rdi
-#endif
-        ret
-// Adding a non-trivial tail, when p > max(m,n)
-tails:
-        mov     [z+8*i],a
-        xor     a, a
-        jmp     tail
-tailloop:
-        mov     [z+8*i],a
-tail:
-        inc     i
-        dec     p
-        jnz     tailloop
-#if WINDOWS_ABI
-        pop    rsi
-        pop    rdi
-#endif
-        ret
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S
deleted file mode 100644
index 25ba17bce2..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S
+++ /dev/null
@@ -1,155 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-//
-// Permission to use, copy, modify, and/or distribute this software for any
-// purpose with or without fee is hereby granted, provided that the above
-// copyright notice and this permission notice appear in all copies.
-//
-// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-// ----------------------------------------------------------------------------
-// Multiply-add with single-word multiplier, z := z + c * y
-// Inputs c, y[n]; outputs function return (carry-out) and z[k]
-//
-//    extern uint64_t bignum_cmadd
-//     (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y);
-//
-// Does the "z := z + c * y" operation where y is n digits, result z is p.
-// Truncates the result in general.
-//
-// The return value is a high/carry word that is meaningful when p = n + 1, or
-// more generally when n <= p and the result fits in p + 1 digits. In these
-// cases it gives the top digit of the (p + 1)-digit result.
-//
-// Standard x86-64 ABI: RDI = k, RSI = z, RDX = c, RCX = n, R8 = y, returns RAX
-// Microsoft x64 ABI:   RCX = k, RDX = z, R8 = c, R9 = n, [RSP+40] = y, returns RAX
-// ----------------------------------------------------------------------------
-#include "s2n_bignum_internal.h"
-        .intel_syntax noprefix
-        S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmadd)
-        S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmadd)
-        .text
-#define p rdi
-#define z rsi
-#define c r9
-#define n rcx
-#define x r8
-#define i r10
-#define h r11
-#define r rbx
-#define hshort r11d
-#define ishort r10d
-S2N_BN_SYMBOL(bignum_cmadd):
-        _CET_ENDBR
-#if WINDOWS_ABI
-        push    rdi
-        push    rsi
-        mov     rdi, rcx
-        mov     rsi, rdx
-        mov     rdx, r8
-        mov     rcx, r9
-        mov     r8, [rsp+56]
-#endif
-// Seems hard to avoid one more register
-        push    rbx
-// First clamp the input size n := min(p,n) since we can never need to read
-// past the p'th term of the input to generate p-digit output.
-// Subtract p := p - min(n,p) so it holds the size of the extra tail needed
-        cmp     p, n
-        cmovc   n, p
-        sub     p, n
-// Initialize high part h = 0; if n = 0 do nothing but return that zero
-        xor     h, h
-        test    n, n
-        jz      end
-// Move c into a safer register as multiplies overwrite rdx
-        mov     c, rdx
-// Initialization of the loop: 2^64 * CF + [h,z_0'] = z_0 + c * x_0
-        mov     rax, [x]
-        mul     c
-        add     [z], rax
-        mov     h, rdx
-        mov     ishort, 1
-        dec     n
-        jz      hightail
-// Main loop, where we always have CF + previous high part h to add in
-loop:
-        adc     h, [z+8*i]
-        sbb     r, r
-        mov     rax, [x+8*i]
-        mul     c
-        sub     rdx, r
-        add     rax, h
-        mov     [z+8*i], rax
-        mov     h, rdx
-        inc     i
-        dec     n
-        jnz     loop
-hightail:
-        adc     h, 0
-// Propagate the carry all the way to the end with h as extra carry word
-tail:
-        test    p, p
-        jz      end
-        add     [z+8*i], h
-        mov     hshort, 0
-        inc     i
-        dec     p
-        jz      highend
-tloop:
-        adc     [z+8*i], h
-        inc     i
-        dec     p
-        jnz     tloop
-highend:
-        adc     h, 0
-// Return the high/carry word
-end:
-        mov     rax, h
-        pop     rbx
-#if WINDOWS_ABI
-        pop    rsi
-        pop    rdi
-#endif
-        ret
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S
deleted file mode 100644
index 12f785d63a..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-//
-// Permission to use, copy, modify, and/or distribute this software for any
-// purpose with or without fee is hereby granted, provided that the above
-// copyright notice and this permission notice appear in all copies.
-//
-// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-// ----------------------------------------------------------------------------
-// Multiply by a single word, z := c * y
-// Inputs c, y[n]; outputs function return (carry-out) and z[k]
-//
-//    extern uint64_t bignum_cmul
-//     (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y);
-//
-// Does the "z := c * y" operation where y is n digits, result z is p.
-// Truncates the result in general unless p >= n + 1.
-//
-// The return value is a high/carry word that is meaningful when p >= n as
-// giving the high part of the result. Since this is always zero if p > n,
-// it is mainly of interest in the special case p = n, i.e. where the source
-// and destination have the same nominal size, when it gives the extra word
-// of the full result.
-//
-// Standard x86-64 ABI: RDI = k, RSI = z, RDX = c, RCX = n, R8 = y, returns RAX
-// Microsoft x64 ABI:   RCX = k, RDX = z, R8 = c, R9 = n, [RSP+40] = y, returns RAX
-// ----------------------------------------------------------------------------
-#include "s2n_bignum_internal.h"
-        .intel_syntax noprefix
-        S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul)
-        S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul)
-        .text
-#define p rdi
-#define z rsi
-#define c r9
-#define n rcx
-#define x r8
-#define i r10
-#define h r11
-S2N_BN_SYMBOL(bignum_cmul):
-        _CET_ENDBR
-#if WINDOWS_ABI
-        push    rdi
-        push    rsi
-        mov     rdi, rcx
-        mov     rsi, rdx
-        mov     rdx, r8
-        mov     rcx, r9
-        mov     r8, [rsp+56]
-#endif
-// First clamp the input size n := min(p,n) since we can never need to read
-// past the p'th term of the input to generate p-digit output. Now we can
-// assume that n <= p
-        cmp     p, n
-        cmovc   n, p
-// Initialize current input/output pointer offset i and high part h.
-// But then if n = 0 skip the multiplication and go to the tail part
-        xor     h, h
-        xor     i, i
-        test    n, n
-        jz      tail
-// Move c into a safer register as multiplies overwrite rdx
-        mov     c, rdx
-// Initialization of the loop: [h,l] = c * x_0
-        mov     rax, [x]
-        mul     c
-        mov     [z], rax
-        mov     h, rdx
-        inc     i
-        cmp     i, n
-        jz      tail
-// Main loop doing the multiplications
-loop:
-        mov     rax, [x+8*i]
-        mul     c
-        add     rax, h
-        adc     rdx, 0
-        mov     [z+8*i], rax
-        mov     h, rdx
-        inc     i
-        cmp     i, n
-        jc      loop
-// Add a tail when the destination is longer
-tail:
-        cmp     i, p
-        jnc     end
-        mov     [z+8*i], h
-        xor     h, h
-        inc     i
-        cmp     i, p
-        jnc     end
-tloop:
-        mov     [z+8*i], h
-        inc     i
-        cmp     i, p
-        jc      tloop
-// Return the high/carry word
-end:
-        mov     rax, h
-#if WINDOWS_ABI
-        pop    rsi
-        pop    rdi
-#endif
-        ret
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S
deleted file mode 100644
index a3552679a2..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S
+++ /dev/null
@@ -1,167 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-//
-// Permission to use, copy, modify, and/or distribute this software for any
-// purpose with or without fee is hereby granted, provided that the above
-// copyright notice and this permission notice appear in all copies.
-//
-// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-// ----------------------------------------------------------------------------
-// Multiply z := x * y
-// Inputs x[m], y[n]; output z[k]
-//
-//    extern void bignum_mul
-//     (uint64_t k, uint64_t *z,
-//      uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
-//
-// Does the "z := x * y" operation where x is m digits, y is n, result z is k.
-// Truncates the result in general unless k >= m + n
-//
-// Standard x86-64 ABI: RDI = k, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y
-// Microsoft x64 ABI:   RCX = k, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y
-// ----------------------------------------------------------------------------
-#include "s2n_bignum_internal.h"
-        .intel_syntax noprefix
-        S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul)
-        S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul)
-        .text
-// These are actually right
-#define p rdi
-#define z rsi
-#define n r8
-// These are not
-#define c r15
-#define h r14
-#define l r13
-#define x r12
-#define y r11
-#define i rbx
-#define k r10
-#define m rbp
-// These are always local scratch since multiplier result is in these
-#define a rax
-#define d rdx
-S2N_BN_SYMBOL(bignum_mul):
-        _CET_ENDBR
-#if WINDOWS_ABI
-        push    rdi
-        push    rsi
-        mov     rdi, rcx
-        mov     rsi, rdx
-        mov     rdx, r8
-        mov     rcx, r9
-        mov     r8, [rsp+56]
-        mov     r9, [rsp+64]
-#endif
-// We use too many registers, and also we need rax:rdx for multiplications
-        push    rbx
-        push    rbp
-        push    r12
-        push    r13
-        push    r14
-        push    r15
-        mov     m, rdx
-// If the result size is zero, do nothing
-// Note that even if either or both inputs has size zero, we can't
-// just give up because we at least need to zero the output array
-// If we did a multiply-add variant, however, then we could
-        test    p, p
-        jz      end
-// Set initial 2-part sum to zero (we zero c inside the body)
-        xor     h,h
-        xor     l,l
-// Otherwise do outer loop k = 0 ... k = p - 1
-        xor     k, k
-outerloop:
-// Zero our carry term first; we eventually want it and a zero is useful now
-// Set a =  max 0 (k + 1 - n), i = min (k + 1) m
-// This defines the range a <= j < i for the inner summation
-// Note that since k < p < 2^64 we can assume k + 1 doesn't overflow
-// And since we want to increment it anyway, we might as well do it now
-        xor     c, c            // c = 0
-        inc     k               // k = k + 1
-        mov     a, k            // a = k + 1
-        sub     a, n            // a = k + 1 - n
-        cmovc   a, c            // a = max 0 (k + 1 - n)
-        mov     i, m            // i = m
-        cmp     k, m            // CF <=> k + 1 < m
-        cmovc   i, k            // i = min (k + 1) m
-// Turn i into a loop count, and skip things if it's <= 0
-// Otherwise set up initial pointers x -> x0[a] and y -> y0[k - a]
-// and then launch into the main inner loop, postdecrementing i
-        mov     d, k
-        sub     d, i
-        sub     i, a
-        jbe     innerend
-        lea     x,[rcx+8*a]
-        lea     y,[r9+8*d-8]
-innerloop:
-        mov     rax, [y+8*i]
-        mul     QWORD PTR  [x]
-        add     x, 8
-        add     l, rax
-        adc     h, rdx
-        adc     c, 0
-        dec     i
-        jnz     innerloop
-innerend:
-        mov     [z], l
-        mov     l, h
-        mov     h, c
-        add     z, 8
-        cmp     k, p
-        jc      outerloop
-end:
-        pop     r15
-        pop     r14
-        pop     r13
-        pop     r12
-        pop     rbp
-        pop     rbx
-#if WINDOWS_ABI
-        pop    rsi
-        pop    rdi
-#endif
-        ret
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S
deleted file mode 100644
index 70ff69e372..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S
+++ /dev/null
@@ -1,157 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-//
-// Permission to use, copy, modify, and/or distribute this software for any
-// purpose with or without fee is hereby granted, provided that the above
-// copyright notice and this permission notice appear in all copies.
-//
-// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-// ----------------------------------------------------------------------------
-// Multiply z := x * y
-// Inputs x[4], y[4]; output z[8]
-//
-//    extern void bignum_mul_4_8_alt
-//      (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]);
-//
-// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y
-// Microsoft x64 ABI:   RCX = z, RDX = x, R8 = y
-// ----------------------------------------------------------------------------
-#include "s2n_bignum_internal.h"
-        .intel_syntax noprefix
-        S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_4_8_alt)
-        S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_4_8_alt)
-        .text
-// These are actually right
-#define z rdi
-#define x rsi
-// This is moved from rdx to free it for muls
-#define y rcx
-// Other variables used as a rotating 3-word window to add terms to
-#define t0 r8
-#define t1 r9
-#define t2 r10
-// Macro for the key "multiply and add to (c,h,l)" step
-#define combadd(c,h,l,numa,numb)                \
-        mov     rax, numa;                      \
-        mul     QWORD PTR numb;                 \
-        add     l, rax;                         \
-        adc     h, rdx;                         \
-        adc     c, 0
-// A minutely shorter form for when c = 0 initially
-#define combadz(c,h,l,numa,numb)                \
-        mov     rax, numa;                      \
-        mul     QWORD PTR numb;                 \
-        add     l, rax;                         \
-        adc     h, rdx;                         \
-        adc     c, c
-// A short form where we don't expect a top carry
-#define combads(h,l,numa,numb)                  \
-        mov     rax, numa;                      \
-        mul     QWORD PTR numb;                 \
-        add     l, rax;                         \
-        adc     h, rdx
-S2N_BN_SYMBOL(bignum_mul_4_8_alt):
-        _CET_ENDBR
-#if WINDOWS_ABI
-        push    rdi
-        push    rsi
-        mov     rdi, rcx
-        mov     rsi, rdx
-        mov     rdx, r8
-#endif
-// Copy y into a safe register to start with
-        mov     y, rdx
-// Result term 0
-        mov     rax, [x]
-        mul     QWORD PTR [y]
-        mov     [z], rax
-        mov     t0, rdx
-        xor     t1, t1
-// Result term 1
-        xor     t2, t2
-        combads(t1,t0,[x],[y+8])
-        combadz(t2,t1,t0,[x+8],[y])
-        mov     [z+8], t0
-// Result term 2
-        xor     t0, t0
-        combadz(t0,t2,t1,[x],[y+16])
-        combadd(t0,t2,t1,[x+8],[y+8])
-        combadd(t0,t2,t1,[x+16],[y])
-        mov     [z+16], t1
-// Result term 3
-        xor     t1, t1
-        combadz(t1,t0,t2,[x],[y+24])
-        combadd(t1,t0,t2,[x+8],[y+16])
-        combadd(t1,t0,t2,[x+16],[y+8])
-        combadd(t1,t0,t2,[x+24],[y])
-        mov     [z+24], t2
-// Result term 4
-        xor     t2, t2
-        combadz(t2,t1,t0,[x+8],[y+24])
-        combadd(t2,t1,t0,[x+16],[y+16])
-        combadd(t2,t1,t0,[x+24],[y+8])
-        mov     [z+32], t0
-// Result term 5
-        xor     t0, t0
-        combadz(t0,t2,t1,[x+16],[y+24])
-        combadd(t0,t2,t1,[x+24],[y+16])
-        mov     [z+40], t1
-// Result term 6
-        xor     t1, t1
-        combads(t0,t2,[x+24],[y+24])
-        mov     [z+48], t2
-// Result term 7
-        mov     [z+56], t0
-// Return
-#if WINDOWS_ABI
-        pop    rsi
-        pop    rdi
-#endif
-        ret
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S
deleted file mode 100644
index 066403b074..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S
+++ /dev/null
@@ -1,244 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-//
-// Permission to use, copy, modify, and/or distribute this software for any
-// purpose with or without fee is hereby granted, provided that the above
-// copyright notice and this permission notice appear in all copies.
-//
-// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-// ----------------------------------------------------------------------------
-// Multiply z := x * y
-// Inputs x[8], y[8]; output z[16]
-//
-//    extern void bignum_mul_8_16_alt
-//     (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]);
-//
-// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y
-// Microsoft x64 ABI:   RCX = z, RDX = x, R8 = y
-// ----------------------------------------------------------------------------
-#include "s2n_bignum_internal.h"
-        .intel_syntax noprefix
-        S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_8_16_alt)
-        S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_8_16_alt)
-        .text
-// These are actually right
-#define z rdi
-#define x rsi
-// This is moved from rdx to free it for muls
-#define y rcx
-// Other variables used as a rotating 3-word window to add terms to
-#define t0 r8
-#define t1 r9
-#define t2 r10
-// Macro for the key "multiply and add to (c,h,l)" step
-#define combadd(c,h,l,numa,numb)                \
-        mov     rax, numa;                      \
-        mul     QWORD PTR numb;                 \
-        add     l, rax;                         \
-        adc     h, rdx;                         \
-        adc     c, 0
-// A minutely shorter form for when c = 0 initially
-#define combadz(c,h,l,numa,numb)                \
-        mov     rax, numa;                      \
-        mul     QWORD PTR numb;                 \
-        add     l, rax;                         \
-        adc     h, rdx;                         \
-        adc     c, c
-// A short form where we don't expect a top carry
-#define combads(h,l,numa,numb)                  \
-        mov     rax, numa;                      \
-        mul     QWORD PTR numb;                 \
-        add     l, rax;                         \
-        adc     h, rdx
-S2N_BN_SYMBOL(bignum_mul_8_16_alt):
-        _CET_ENDBR
-#if WINDOWS_ABI
-        push    rdi
-        push    rsi
-        mov     rdi, rcx
-        mov     rsi, rdx
-        mov     rdx, r8
-#endif
-// Copy y into a safe register to start with
-        mov     y, rdx
-// Result term 0
-        mov     rax, [x]
-        mul     QWORD PTR [y]
-        mov     [z], rax
-        mov     t0, rdx
-        xor     t1, t1
-// Result term 1
-        xor     t2, t2
-        combads(t1,t0,[x],[y+8])
-        combadz(t2,t1,t0,[x+8],[y])
-        mov     [z+8], t0
-// Result term 2
-        xor     t0, t0
-        combadz(t0,t2,t1,[x],[y+16])
-        combadd(t0,t2,t1,[x+8],[y+8])
-        combadd(t0,t2,t1,[x+16],[y])
-        mov     [z+16], t1
-// Result term 3
-        xor     t1, t1
-        combadz(t1,t0,t2,[x],[y+24])
-        combadd(t1,t0,t2,[x+8],[y+16])
-        combadd(t1,t0,t2,[x+16],[y+8])
-        combadd(t1,t0,t2,[x+24],[y])
-        mov     [z+24], t2
-// Result term 4
-        xor     t2, t2
-        combadz(t2,t1,t0,[x],[y+32])
-        combadd(t2,t1,t0,[x+8],[y+24])
-        combadd(t2,t1,t0,[x+16],[y+16])
-        combadd(t2,t1,t0,[x+24],[y+8])
-        combadd(t2,t1,t0,[x+32],[y])
-        mov     [z+32], t0
-// Result term 5
-        xor     t0, t0
-        combadz(t0,t2,t1,[x],[y+40])
-        combadd(t0,t2,t1,[x+8],[y+32])
-        combadd(t0,t2,t1,[x+16],[y+24])
-        combadd(t0,t2,t1,[x+24],[y+16])
-        combadd(t0,t2,t1,[x+32],[y+8])
-        combadd(t0,t2,t1,[x+40],[y])
-        mov     [z+40], t1
-// Result term 6
-        xor     t1, t1
-        combadz(t1,t0,t2,[x],[y+48])
-        combadd(t1,t0,t2,[x+8],[y+40])
-        combadd(t1,t0,t2,[x+16],[y+32])
-        combadd(t1,t0,t2,[x+24],[y+24])
-        combadd(t1,t0,t2,[x+32],[y+16])
-        combadd(t1,t0,t2,[x+40],[y+8])
-        combadd(t1,t0,t2,[x+48],[y])
-        mov     [z+48], t2
-// Result term 7
-        xor     t2, t2
-        combadz(t2,t1,t0,[x],[y+56])
-        combadd(t2,t1,t0,[x+8],[y+48])
-        combadd(t2,t1,t0,[x+16],[y+40])
-        combadd(t2,t1,t0,[x+24],[y+32])
-        combadd(t2,t1,t0,[x+32],[y+24])
-        combadd(t2,t1,t0,[x+40],[y+16])
-        combadd(t2,t1,t0,[x+48],[y+8])
-        combadd(t2,t1,t0,[x+56],[y])
-        mov     [z+56], t0
-// Result term 8
-        xor     t0, t0
-        combadz(t0,t2,t1,[x+8],[y+56])
-        combadd(t0,t2,t1,[x+16],[y+48])
-        combadd(t0,t2,t1,[x+24],[y+40])
-        combadd(t0,t2,t1,[x+32],[y+32])
-        combadd(t0,t2,t1,[x+40],[y+24])
-        combadd(t0,t2,t1,[x+48],[y+16])
-        combadd(t0,t2,t1,[x+56],[y+8])
-        mov     [z+64], t1
-// Result term 9
-        xor     t1, t1
-        combadz(t1,t0,t2,[x+16],[y+56])
-        combadd(t1,t0,t2,[x+24],[y+48])
-        combadd(t1,t0,t2,[x+32],[y+40])
-        combadd(t1,t0,t2,[x+40],[y+32])
-        combadd(t1,t0,t2,[x+48],[y+24])
-        combadd(t1,t0,t2,[x+56],[y+16])
-        mov     [z+72], t2
-// Result term 10
-        xor     t2, t2
-        combadz(t2,t1,t0,[x+24],[y+56])
-        combadd(t2,t1,t0,[x+32],[y+48])
-        combadd(t2,t1,t0,[x+40],[y+40])
-        combadd(t2,t1,t0,[x+48],[y+32])
-        combadd(t2,t1,t0,[x+56],[y+24])
-        mov     [z+80], t0
-// Result term 11
-        xor     t0, t0
-        combadz(t0,t2,t1,[x+32],[y+56])
-        combadd(t0,t2,t1,[x+40],[y+48])
-        combadd(t0,t2,t1,[x+48],[y+40])
-        combadd(t0,t2,t1,[x+56],[y+32])
-        mov     [z+88], t1
-// Result term 12
-        xor     t1, t1
-        combadz(t1,t0,t2,[x+40],[y+56])
-        combadd(t1,t0,t2,[x+48],[y+48])
-        combadd(t1,t0,t2,[x+56],[y+40])
-        mov     [z+96], t2
-// Result term 13
-        xor     t2, t2
-        combadz(t2,t1,t0,[x+48],[y+56])
-        combadd(t2,t1,t0,[x+56],[y+48])
-        mov     [z+104], t0
-// Result term 14
-        combads(t2,t1,[x+56],[y+56])
-        mov     [z+112], t1
-// Result term 11
-        mov     [z+120], t2
-// Return
-#if WINDOWS_ABI
-        pop    rsi
-        pop    rdi
-#endif
-        ret
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S
deleted file mode 100644
index 54e3f59442..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S
+++ /dev/null
@@ -1,197 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-//
-// Permission to use, copy, modify, and/or distribute this software for any
-// purpose with or without fee is hereby granted, provided that the above
-// copyright notice and this permission notice appear in all copies.
-//
-// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-// ----------------------------------------------------------------------------
-// Square z := x^2
-// Input x[n]; output z[k]
-//
-//    extern void bignum_sqr
-//     (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x);
-//
-// Does the "z := x^2" operation where x is n digits and result z is k.
-// Truncates the result in general unless k >= 2 * n
-//
-// Standard x86-64 ABI: RDI = k, RSI = z, RDX = n, RCX = x
-// Microsoft x64 ABI:   RCX = k, RDX = z, R8 = n, R9 = x
-// ----------------------------------------------------------------------------
-#include "s2n_bignum_internal.h"
-        .intel_syntax noprefix
-        S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr)
-        S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr)
-        .text
-// First three are where arguments come in, but n is moved.
-#define p rdi
-#define z rsi
-#define x rcx
-#define n r8
-// These are always local scratch since multiplier result is in these
-#define a rax
-#define d rdx
-// Other variables
-#define i rbx
-#define ll rbp
-#define hh r9
-#define k r10
-#define y r11
-#define htop r12
-#define l r13
-#define h r14
-#define c r15
-// Short versions
-#define llshort ebp
-S2N_BN_SYMBOL(bignum_sqr):
-        _CET_ENDBR
-#if WINDOWS_ABI
-        push    rdi
-        push    rsi
-        mov     rdi, rcx
-        mov     rsi, rdx
-        mov     rdx, r8
-        mov     rcx, r9
-#endif
-// We use too many registers, and also we need rax:rdx for multiplications
-        push    rbx
-        push    rbp
-        push    r12
-        push    r13
-        push    r14
-        push    r15
-        mov     n, rdx
-// If p = 0 the result is trivial and nothing needs doing
-        test    p, p
-        jz      end
-// initialize (hh,ll) = 0
-        xor     llshort, llshort
-        xor     hh, hh
-// Iterate outer loop from k = 0 ... k = p - 1 producing result digits
-        xor     k, k
-outerloop:
-// First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n
-// We want to accumulate all x[i] * x[k - i] for bot <= i < top
-// For the optimization of squaring we avoid duplication and do
-// 2 * x[i] * x[k - i] for i < htop, where htop = MIN ((k+1)/2) n
-// Initialize i = bot; in fact just compute bot as i directly.
-        xor     c, c
-        lea     i, [k+1]
-        mov     htop, i
-        shr     htop, 1
-        sub     i, n
-        cmovc   i, c
-        cmp     htop, n
-        cmovnc  htop, n
-// Initialize the three-part local sum (c,h,l); c was already done above
-        xor     l, l
-        xor     h, h
-// If htop <= bot then main doubled part of the sum is empty
-        cmp     i, htop
-        jnc     nosumming
-// Use a moving pointer for [y] = x[k-i] for the cofactor
-        mov     a, k
-        sub     a, i
-        lea     y, [x+8*a]
-// Do the main part of the sum x[i] * x[k - i] for 2 * i < k
-innerloop:
-        mov     a, [x+8*i]
-        mul     QWORD PTR [y]
-        add     l, a
-        adc     h, d
-        adc     c, 0
-        sub     y, 8
-        inc     i
-        cmp     i, htop
-        jc      innerloop
-// Now double it
-        add     l, l
-        adc     h, h
-        adc     c, c
-// If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term
-nosumming:
-        test    k, 1
-        jnz     innerend
-        cmp     i, n
-        jnc     innerend
-        mov     a, [x+8*i]
-        mul     a
-        add     l, a
-        adc     h, d
-        adc     c, 0
-// Now add the local sum into the global sum, store and shift
-innerend:
-        add     l, ll
-        mov     [z+8*k], l
-        adc     h, hh
-        mov     ll, h
-        adc     c, 0
-        mov     hh, c
-        inc     k
-        cmp     k, p
-        jc      outerloop
-// Restore registers and return
-end:
-        pop     r15
-        pop     r14
-        pop     r13
-        pop     r12
-        pop     rbp
-        pop     rbx
-#if WINDOWS_ABI
-        pop    rsi
-        pop    rdi
-#endif
-        ret
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S
deleted file mode 100644
index 7c534ae907..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S
+++ /dev/null
@@ -1,145 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-//
-// Permission to use, copy, modify, and/or distribute this software for any
-// purpose with or without fee is hereby granted, provided that the above
-// copyright notice and this permission notice appear in all copies.
-//
-// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-// ----------------------------------------------------------------------------
-// Square, z := x^2
-// Input x[4]; output z[8]
-//
-//    extern void bignum_sqr_4_8_alt
-//      (uint64_t z[static 8], uint64_t x[static 4]);
-//
-// Standard x86-64 ABI: RDI = z, RSI = x
-// Microsoft x64 ABI:   RCX = z, RDX = x
-// ----------------------------------------------------------------------------
-#include "s2n_bignum_internal.h"
-        .intel_syntax noprefix
-        S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_4_8_alt)
-        S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_4_8_alt)
-        .text
-// Input arguments
-#define z rdi
-#define x rsi
-// Other variables used as a rotating 3-word window to add terms to
-#define t0 rcx
-#define t1 r8
-#define t2 r9
-// Macro for the key "multiply and add to (c,h,l)" step, for square term
-#define combadd1(c,h,l,numa)                    \
-        mov     rax, numa;                      \
-        mul     rax;                            \
-        add     l, rax;                         \
-        adc     h, rdx;                         \
-        adc     c, 0
-// A short form where we don't expect a top carry
-#define combads(h,l,numa)                       \
-        mov     rax, numa;                      \
-        mul     rax;                            \
-        add     l, rax;                         \
-        adc     h, rdx
-// A version doubling before adding, for non-square terms
-#define combadd2(c,h,l,numa,numb)               \
-        mov     rax, numa;                      \
-        mul     QWORD PTR numb;                 \
-        add     rax, rax;                       \
-        adc     rdx, rdx;                       \
-        adc     c, 0;                           \
-        add     l, rax;                         \
-        adc     h, rdx;                         \
-        adc     c, 0
-S2N_BN_SYMBOL(bignum_sqr_4_8_alt):
-        _CET_ENDBR
-#if WINDOWS_ABI
-        push    rdi
-        push    rsi
-        mov     rdi, rcx
-        mov     rsi, rdx
-#endif
-// Result term 0
-        mov     rax, [x]
-        mul     rax
-        mov     [z], rax
-        mov     t0, rdx
-        xor     t1, t1
-// Result term 1
-       xor     t2, t2
-       combadd2(t2,t1,t0,[x],[x+8])
-       mov     [z+8], t0
-// Result term 2
-        xor     t0, t0
-        combadd1(t0,t2,t1,[x+8])
-        combadd2(t0,t2,t1,[x],[x+16])
-        mov     [z+16], t1
-// Result term 3
-        xor     t1, t1
-        combadd2(t1,t0,t2,[x],[x+24])
-        combadd2(t1,t0,t2,[x+8],[x+16])
-        mov     [z+24], t2
-// Result term 4
-        xor     t2, t2
-        combadd2(t2,t1,t0,[x+8],[x+24])
-        combadd1(t2,t1,t0,[x+16])
-        mov     [z+32], t0
-// Result term 5
-        xor     t0, t0
-        combadd2(t0,t2,t1,[x+16],[x+24])
-        mov     [z+40], t1
-// Result term 6
-        xor     t1, t1
-        combads(t0,t2,[x+24])
-        mov     [z+48], t2
-// Result term 7
-        mov     [z+56], t0
-// Return
-#if WINDOWS_ABI
-        pop    rsi
-        pop    rdi
-#endif
-        ret
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S
deleted file mode 100644
index ac0b6f96c2..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S
+++ /dev/null
@@ -1,242 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-//
-// Permission to use, copy, modify, and/or distribute this software for any
-// purpose with or without fee is hereby granted, provided that the above
-// copyright notice and this permission notice appear in all copies.
-//
-// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-// ----------------------------------------------------------------------------
-// Square, z := x^2
-// Input x[8]; output z[16]
-//
-//    extern void bignum_sqr_8_16_alt (uint64_t z[static 16], uint64_t x[static 8]);
-//
-// Standard x86-64 ABI: RDI = z, RSI = x
-// Microsoft x64 ABI:   RCX = z, RDX = x
-// ----------------------------------------------------------------------------
-#include "s2n_bignum_internal.h"
-        .intel_syntax noprefix
-        S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_8_16_alt)
-        S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_8_16_alt)
-        .text
-// Input arguments
-#define z rdi
-#define x rsi
-// Other variables used as a rotating 3-word window to add terms to
-#define t0 r8
-#define t1 r9
-#define t2 r10
-// Additional temporaries for local windows to share doublings
-#define u0 rcx
-#define u1 r11
-// Macro for the key "multiply and add to (c,h,l)" step
-#define combadd(c,h,l,numa,numb)                \
-        mov     rax, numa;                      \
-        mul     QWORD PTR numb;                 \
-        add     l, rax;                         \
-        adc     h, rdx;                         \
-        adc     c, 0
-// Set up initial window (c,h,l) = numa * numb
-#define combaddz(c,h,l,numa,numb)               \
-        mov     rax, numa;                      \
-        mul     QWORD PTR numb;                 \
-        xor     c, c;                           \
-        mov     l, rax;                         \
-        mov     h, rdx
-// Doubling step (c,h,l) = 2 * (c,hh,ll) + (0,h,l)
-#define doubladd(c,h,l,hh,ll)                   \
-        add     ll, ll;                         \
-        adc     hh, hh;                         \
-        adc     c, c;                           \
-        add     l, ll;                          \
-        adc     h, hh;                          \
-        adc     c, 0
-// Square term incorporation (c,h,l) += numba^2
-#define combadd1(c,h,l,numa)                    \
-        mov     rax, numa;                      \
-        mul     rax;                            \
-        add     l, rax;                         \
-        adc     h, rdx;                         \
-        adc     c, 0
-// A short form where we don't expect a top carry
-#define combads(h,l,numa)                       \
-        mov     rax, numa;                      \
-        mul     rax;                            \
-        add     l, rax;                         \
-        adc     h, rdx
-// A version doubling directly before adding, for single non-square terms
-#define combadd2(c,h,l,numa,numb)               \
-        mov     rax, numa;                      \
-        mul     QWORD PTR numb;                 \
-        add     rax, rax;                       \
-        adc     rdx, rdx;                       \
-        adc     c, 0;                           \
-        add     l, rax;                         \
-        adc     h, rdx;                         \
-        adc     c, 0
-S2N_BN_SYMBOL(bignum_sqr_8_16_alt):
-        _CET_ENDBR
-#if WINDOWS_ABI
-        push    rdi
-        push    rsi
-        mov     rdi, rcx
-        mov     rsi, rdx
-#endif
-// Result term 0
-        mov     rax, [x]
-        mul     rax
-        mov     [z], rax
-        mov     t0, rdx
-        xor     t1, t1
-// Result term 1
-        xor     t2, t2
-        combadd2(t2,t1,t0,[x],[x+8])
-        mov     [z+8], t0
-// Result term 2
-        xor     t0, t0
-        combadd1(t0,t2,t1,[x+8])
-        combadd2(t0,t2,t1,[x],[x+16])
-        mov     [z+16], t1
-// Result term 3
-        combaddz(t1,u1,u0,[x],[x+24])
-        combadd(t1,u1,u0,[x+8],[x+16])
-        doubladd(t1,t0,t2,u1,u0)
-        mov     [z+24], t2
-// Result term 4
-        combaddz(t2,u1,u0,[x],[x+32])
-        combadd(t2,u1,u0,[x+8],[x+24])
-        doubladd(t2,t1,t0,u1,u0)
-        combadd1(t2,t1,t0,[x+16])
-        mov     [z+32], t0
-// Result term 5
-        combaddz(t0,u1,u0,[x],[x+40])
-        combadd(t0,u1,u0,[x+8],[x+32])
-        combadd(t0,u1,u0,[x+16],[x+24])
-        doubladd(t0,t2,t1,u1,u0)
-        mov     [z+40], t1
-// Result term 6
-        combaddz(t1,u1,u0,[x],[x+48])
-        combadd(t1,u1,u0,[x+8],[x+40])
-        combadd(t1,u1,u0,[x+16],[x+32])
-        doubladd(t1,t0,t2,u1,u0)
-        combadd1(t1,t0,t2,[x+24])
-        mov     [z+48], t2
-// Result term 7
-        combaddz(t2,u1,u0,[x],[x+56])
-        combadd(t2,u1,u0,[x+8],[x+48])
-        combadd(t2,u1,u0,[x+16],[x+40])
-        combadd(t2,u1,u0,[x+24],[x+32])
-        doubladd(t2,t1,t0,u1,u0)
-        mov     [z+56], t0
-// Result term 8
-        combaddz(t0,u1,u0,[x+8],[x+56])
-        combadd(t0,u1,u0,[x+16],[x+48])
-        combadd(t0,u1,u0,[x+24],[x+40])
-        doubladd(t0,t2,t1,u1,u0)
-        combadd1(t0,t2,t1,[x+32])
-        mov     [z+64], t1
-// Result term 9
-        combaddz(t1,u1,u0,[x+16],[x+56])
-        combadd(t1,u1,u0,[x+24],[x+48])
-        combadd(t1,u1,u0,[x+32],[x+40])
-        doubladd(t1,t0,t2,u1,u0)
-        mov     [z+72], t2
-// Result term 10
-        combaddz(t2,u1,u0,[x+24],[x+56])
-        combadd(t2,u1,u0,[x+32],[x+48])
-        doubladd(t2,t1,t0,u1,u0)
-        combadd1(t2,t1,t0,[x+40])
-        mov     [z+80], t0
-// Result term 11
-        combaddz(t0,u1,u0,[x+32],[x+56])
-        combadd(t0,u1,u0,[x+40],[x+48])
-        doubladd(t0,t2,t1,u1,u0)
-        mov     [z+88], t1
-// Result term 12
-        xor     t1, t1
-        combadd2(t1,t0,t2,[x+40],[x+56])
-        combadd1(t1,t0,t2,[x+48])
-        mov     [z+96], t2
-// Result term 13
-        xor     t2, t2
-        combadd2(t2,t1,t0,[x+48],[x+56])
-        mov     [z+104], t0
-// Result term 14
-        combads(t2,t1,[x+56])
-        mov     [z+112], t1
-// Result term 15
-        mov     [z+120], t2
-// Return
-#if WINDOWS_ABI
-        pop    rsi
-        pop    rdi
-#endif
-        ret
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S
deleted file mode 100644
index 3ff8a30510..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S
+++ /dev/null
@@ -1,153 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-//
-// Permission to use, copy, modify, and/or distribute this software for any
-// purpose with or without fee is hereby granted, provided that the above
-// copyright notice and this permission notice appear in all copies.
-//
-// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-// ----------------------------------------------------------------------------
-// Subtract, z := x - y
-// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
-//
-//    extern uint64_t bignum_sub
-//     (uint64_t p, uint64_t *z,
-//      uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
-//
-// Does the z := x - y operation, truncating modulo p words in general and
-// returning a top borrow (0 or 1) in the p'th place, only subtracting input
-// words below p (as well as m and n respectively) to get the diff and borrow.
-//
-// Standard x86-64 ABI: RDI = p, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y, returns RAX
-// Microsoft x64 ABI:   RCX = p, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y, returns RAX
-// ----------------------------------------------------------------------------
-#include "s2n_bignum_internal.h"
-        .intel_syntax noprefix
-        S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sub)
-        S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sub)
-        .text
-#define p rdi
-#define z rsi
-#define m rdx
-#define x rcx
-#define n r8
-#define y r9
-#define i r10
-#define a rax
-#define ashort eax
-S2N_BN_SYMBOL(bignum_sub):
-        _CET_ENDBR
-#if WINDOWS_ABI
-        push    rdi
-        push    rsi
-        mov     rdi, rcx
-        mov     rsi, rdx
-        mov     rdx, r8
-        mov     rcx, r9
-        mov     r8, [rsp+56]
-        mov     r9, [rsp+64]
-#endif
-// Zero the main index counter for both branches
-        xor     i, i
-// First clamp the two input sizes m := min(p,m) and n := min(p,n) since
-// we'll never need words past the p'th. Can now assume m <= p and n <= p.
-// Then compare the modified m and n and branch accordingly
-        cmp     p, m
-        cmovc   m, p
-        cmp     p, n
-        cmovc   n, p
-        cmp     m, n
-        jc      ylonger
-// The case where x is longer or of the same size (p >= m >= n)
-        sub     p, m
-        sub     m, n
-        inc     m
-        test    n, n
-        jz      xtest
-xmainloop:
-        mov     a, [x+8*i]
-        sbb     a, [y+8*i]
-        mov     [z+8*i],a
-        inc     i
-        dec     n
-        jnz     xmainloop
-        jmp     xtest
-xtoploop:
-        mov     a, [x+8*i]
-        sbb     a, 0
-        mov     [z+8*i],a
-        inc     i
-xtest:
-        dec     m
-        jnz     xtoploop
-        sbb     a, a
-        test    p, p
-        jz      tailskip
-tailloop:
-        mov     [z+8*i],a
-        inc     i
-        dec     p
-        jnz     tailloop
-tailskip:
-        neg     a
-#if WINDOWS_ABI
-        pop    rsi
-        pop    rdi
-#endif
-        ret
-// The case where y is longer (p >= n > m)
-ylonger:
-        sub     p, n
-        sub     n, m
-        test    m, m
-        jz      ytoploop
-ymainloop:
-        mov     a, [x+8*i]
-        sbb     a, [y+8*i]
-        mov     [z+8*i],a
-        inc     i
-        dec     m
-        jnz     ymainloop
-ytoploop:
-        mov     ashort, 0
-        sbb     a, [y+8*i]
-        mov     [z+8*i],a
-        inc     i
-        dec     n
-        jnz     ytoploop
-        sbb     a, a
-        test    p, p
-        jnz     tailloop
-        neg     a
-#if WINDOWS_ABI
-        pop    rsi
-        pop    rdi
-#endif
-        ret
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bn_arch.c b/src/lib/libcrypto/bn/arch/amd64/bn_arch.c
deleted file mode 100644
index a377a05681..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bn_arch.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*      $OpenBSD: bn_arch.c,v 1.7 2023/06/24 16:01:44 jsing Exp $ */
-/*
- * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#include <openssl/bn.h>
-#include "bn_arch.h"
-#include "bn_local.h"
-#include "s2n_bignum.h"
-#ifdef HAVE_BN_ADD
-BN_ULONG
-bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b,
-    int b_len)
-{
-        return bignum_add(r_len, (uint64_t *)r, a_len, (uint64_t *)a,
-            b_len, (uint64_t *)b);
-}
-#endif
-#ifdef HAVE_BN_ADD_WORDS
-BN_ULONG
-bn_add_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n)
-{
-        return bignum_add(n, (uint64_t *)rd, n, (uint64_t *)ad, n,
-            (uint64_t *)bd);
-}
-#endif
-#ifdef HAVE_BN_SUB
-BN_ULONG
-bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b,
-    int b_len)
-{
-        return bignum_sub(r_len, (uint64_t *)r, a_len, (uint64_t *)a,
-            b_len, (uint64_t *)b);
-}
-#endif
-#ifdef HAVE_BN_SUB_WORDS
-BN_ULONG
-bn_sub_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n)
-{
-        return bignum_sub(n, (uint64_t *)rd, n, (uint64_t *)ad, n,
-            (uint64_t *)bd);
-}
-#endif
-#ifdef HAVE_BN_MUL_ADD_WORDS
-BN_ULONG
-bn_mul_add_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w)
-{
-        return bignum_cmadd(num, (uint64_t *)rd, w, num, (uint64_t *)ad);
-}
-#endif
-#ifdef HAVE_BN_MUL_WORDS
-BN_ULONG
-bn_mul_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w)
-{
-        return bignum_cmul(num, (uint64_t *)rd, w, num, (uint64_t *)ad);
-}
-#endif
-#ifdef HAVE_BN_MUL_COMBA4
-void
-bn_mul_comba4(BN_ULONG *rd, BN_ULONG *ad, BN_ULONG *bd)
-{
-        /* XXX - consider using non-alt on CPUs that have the ADX extension. */
-        bignum_mul_4_8_alt((uint64_t *)rd, (uint64_t *)ad, (uint64_t *)bd);
-}
-#endif
-#ifdef HAVE_BN_MUL_COMBA8
-void
-bn_mul_comba8(BN_ULONG *rd, BN_ULONG *ad, BN_ULONG *bd)
-{
-        /* XXX - consider using non-alt on CPUs that have the ADX extension. */
-        bignum_mul_8_16_alt((uint64_t *)rd, (uint64_t *)ad, (uint64_t *)bd);
-}
-#endif
-#ifdef HAVE_BN_SQR
-int
-bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx)
-{
-        bignum_sqr(r_len, (uint64_t *)r->d, a->top, (uint64_t *)a->d);
-        return 1;
-}
-#endif
-#ifdef HAVE_BN_SQR_COMBA4
-void
-bn_sqr_comba4(BN_ULONG *rd, const BN_ULONG *ad)
-{
-        /* XXX - consider using non-alt on CPUs that have the ADX extension. */
-        bignum_sqr_4_8_alt((uint64_t *)rd, (uint64_t *)ad);
-}
-#endif
-#ifdef HAVE_BN_SQR_COMBA8
-void
-bn_sqr_comba8(BN_ULONG *rd, const BN_ULONG *ad)
-{
-        /* XXX - consider using non-alt on CPUs that have the ADX extension. */
-        bignum_sqr_8_16_alt((uint64_t *)rd, (uint64_t *)ad);
-}
-#endif
-#ifdef HAVE_BN_WORD_CLZ
-int
-bn_word_clz(BN_ULONG w)
-{
-        return word_clz(w);
-}
-#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h b/src/lib/libcrypto/bn/arch/amd64/bn_arch.h
deleted file mode 100644
index 927cd75208..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*      $OpenBSD: bn_arch.h,v 1.14 2024/03/26 06:09:25 jsing Exp $ */
-/*
- * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#include <openssl/bn.h>
-#ifndef HEADER_BN_ARCH_H
-#define HEADER_BN_ARCH_H
-#ifndef OPENSSL_NO_ASM
-#define HAVE_BN_ADD
-#define HAVE_BN_ADD_WORDS
-#define HAVE_BN_DIV_WORDS
-#define HAVE_BN_MUL_ADD_WORDS
-#define HAVE_BN_MUL_COMBA4
-#define HAVE_BN_MUL_COMBA8
-#define HAVE_BN_MUL_WORDS
-#define HAVE_BN_SQR
-#define HAVE_BN_SQR_COMBA4
-#define HAVE_BN_SQR_COMBA8
-#define HAVE_BN_SUB
-#define HAVE_BN_SUB_WORDS
-#define HAVE_BN_WORD_CLZ
-#if defined(__GNUC__)
-#define HAVE_BN_DIV_REM_WORDS_INLINE
-static inline void
-bn_div_rem_words_inline(BN_ULONG h, BN_ULONG l, BN_ULONG d, BN_ULONG *out_q,
-    BN_ULONG *out_r)
-{
-        BN_ULONG q, r;
-        /*
-         * Unsigned division of %rdx:%rax by d with quotient being stored in
-         * %rax and remainder in %rdx.
-         */
-        __asm__ volatile ("divq %4"
-            : "=a"(q), "=d"(r)
-            : "d"(h), "a"(l), "rm"(d)
-            : "cc");
-        *out_q = q;
-        *out_r = r;
-}
-#define HAVE_BN_MULW
-static inline void
-bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
-{
-        BN_ULONG r1, r0;
-        /*
-         * Unsigned multiplication of %rax, with the double word result being
-         * stored in %rdx:%rax.
-         */
-        __asm__ ("mulq %3"
-            : "=d"(r1), "=a"(r0)
-            : "a"(a), "rm"(b)
-            : "cc");
-        *out_r1 = r1;
-        *out_r0 = r0;
-}
-#define HAVE_BN_SUBW
-static inline void
-bn_subw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_borrow, BN_ULONG *out_r0)
-{
-        BN_ULONG borrow, r0;
-        __asm__ (
-            "subq   %3, %1 \n"
-            "setb   %b0 \n"
-            "and    $1, %0 \n"
-            : "=r"(borrow), "=r"(r0)
-            : "1"(a), "rm"(b)
-            : "cc");
-        *out_borrow = borrow;
-        *out_r0 = r0;
-}
-#endif /* __GNUC__ */
-#endif
-#endif
diff --git a/src/lib/libcrypto/bn/arch/amd64/word_clz.S b/src/lib/libcrypto/bn/arch/amd64/word_clz.S
deleted file mode 100644
index 3926fcd4b0..0000000000
--- a/src/lib/libcrypto/bn/arch/amd64/word_clz.S
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-//
-// Permission to use, copy, modify, and/or distribute this software for any
-// purpose with or without fee is hereby granted, provided that the above
-// copyright notice and this permission notice appear in all copies.
-//
-// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-// ----------------------------------------------------------------------------
-// Count leading zero bits in a single word
-// Input a; output function return
-//
-//    extern uint64_t word_clz (uint64_t a);
-//
-// Standard x86-64 ABI: RDI = a, returns RAX
-// Microsoft x64 ABI:   RCX = a, returns RAX
-// ----------------------------------------------------------------------------
-#include "s2n_bignum_internal.h"
-        .intel_syntax noprefix
-        S2N_BN_SYM_VISIBILITY_DIRECTIVE(word_clz)
-        S2N_BN_SYM_PRIVACY_DIRECTIVE(word_clz)
-        .text
-S2N_BN_SYMBOL(word_clz):
-        _CET_ENDBR
-#if WINDOWS_ABI
-        push    rdi
-        push    rsi
-        mov     rdi, rcx
-#endif
-// First do rax = 63 - bsr(a), which is right except (maybe) for zero inputs
-        bsr     rax, rdi
-        xor     rax, 63
-// Force return of 64 in the zero-input case
-        mov     edx, 64
-        test    rdi, rdi
-        cmove   rax, rdx
-#if WINDOWS_ABI
-        pop    rsi
-        pop    rdi
-#endif
-        ret
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif