diff options
| author | jsing <> | 2025-08-11 14:11:20 +0000 |
|---|---|---|
| committer | jsing <> | 2025-08-11 14:11:20 +0000 |
| commit | 28d52ec2924676a240d0477f564160bd054d5549 (patch) | |
| tree | 22e2b3da2203c9ba206a85da5dd6bf7a67c0a1e8 /src | |
| parent | 4fd25eed849a1e1cc5f0497cf86579813203b406 (diff) | |
| download | openbsd-28d52ec2924676a240d0477f564160bd054d5549.tar.gz openbsd-28d52ec2924676a240d0477f564160bd054d5549.tar.bz2 openbsd-28d52ec2924676a240d0477f564160bd054d5549.zip | |
Resync s2n-bignum primitives for amd64 with upstream.
This amounts to whitespace changes and label renaming.
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_add.S | 49 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S | 30 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S | 26 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_mul.S | 23 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S | 6 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S | 7 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S | 27 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S | 6 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S | 5 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_sub.S | 45 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/word_clz.S | 4 |
11 files changed, 113 insertions, 115 deletions
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S b/src/lib/libcrypto/bn/arch/amd64/bignum_add.S index 5fe4aae7a1..5ec0e36282 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_add.S | |||
| @@ -16,9 +16,8 @@ | |||
| 16 | // Add, z := x + y | 16 | // Add, z := x + y |
| 17 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] | 17 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] |
| 18 | // | 18 | // |
| 19 | // extern uint64_t bignum_add | 19 | // extern uint64_t bignum_add(uint64_t p, uint64_t *z, uint64_t m, |
| 20 | // (uint64_t p, uint64_t *z, | 20 | // const uint64_t *x, uint64_t n, const uint64_t *y); |
| 21 | // uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | ||
| 22 | // | 21 | // |
| 23 | // Does the z := x + y operation, truncating modulo p words in general and | 22 | // Does the z := x + y operation, truncating modulo p words in general and |
| 24 | // returning a top carry (0 or 1) in the p'th place, only adding the input | 23 | // returning a top carry (0 or 1) in the p'th place, only adding the input |
| @@ -49,7 +48,7 @@ | |||
| 49 | 48 | ||
| 50 | 49 | ||
| 51 | S2N_BN_SYMBOL(bignum_add): | 50 | S2N_BN_SYMBOL(bignum_add): |
| 52 | _CET_ENDBR | 51 | _CET_ENDBR |
| 53 | 52 | ||
| 54 | #if WINDOWS_ABI | 53 | #if WINDOWS_ABI |
| 55 | push rdi | 54 | push rdi |
| @@ -75,7 +74,7 @@ S2N_BN_SYMBOL(bignum_add): | |||
| 75 | cmp p, n | 74 | cmp p, n |
| 76 | cmovc n, p | 75 | cmovc n, p |
| 77 | cmp m, n | 76 | cmp m, n |
| 78 | jc ylonger | 77 | jc bignum_add_ylonger |
| 79 | 78 | ||
| 80 | // The case where x is longer or of the same size (p >= m >= n) | 79 | // The case where x is longer or of the same size (p >= m >= n) |
| 81 | 80 | ||
| @@ -83,27 +82,27 @@ S2N_BN_SYMBOL(bignum_add): | |||
| 83 | sub m, n | 82 | sub m, n |
| 84 | inc m | 83 | inc m |
| 85 | test n, n | 84 | test n, n |
| 86 | jz xtest | 85 | jz bignum_add_xtest |
| 87 | xmainloop: | 86 | bignum_add_xmainloop: |
| 88 | mov a, [x+8*i] | 87 | mov a, [x+8*i] |
| 89 | adc a, [y+8*i] | 88 | adc a, [y+8*i] |
| 90 | mov [z+8*i],a | 89 | mov [z+8*i],a |
| 91 | inc i | 90 | inc i |
| 92 | dec n | 91 | dec n |
| 93 | jnz xmainloop | 92 | jnz bignum_add_xmainloop |
| 94 | jmp xtest | 93 | jmp bignum_add_xtest |
| 95 | xtoploop: | 94 | bignum_add_xtoploop: |
| 96 | mov a, [x+8*i] | 95 | mov a, [x+8*i] |
| 97 | adc a, 0 | 96 | adc a, 0 |
| 98 | mov [z+8*i],a | 97 | mov [z+8*i],a |
| 99 | inc i | 98 | inc i |
| 100 | xtest: | 99 | bignum_add_xtest: |
| 101 | dec m | 100 | dec m |
| 102 | jnz xtoploop | 101 | jnz bignum_add_xtoploop |
| 103 | mov ashort, 0 | 102 | mov ashort, 0 |
| 104 | adc a, 0 | 103 | adc a, 0 |
| 105 | test p, p | 104 | test p, p |
| 106 | jnz tails | 105 | jnz bignum_add_tails |
| 107 | #if WINDOWS_ABI | 106 | #if WINDOWS_ABI |
| 108 | pop rsi | 107 | pop rsi |
| 109 | pop rdi | 108 | pop rdi |
| @@ -112,30 +111,30 @@ xtest: | |||
| 112 | 111 | ||
| 113 | // The case where y is longer (p >= n > m) | 112 | // The case where y is longer (p >= n > m) |
| 114 | 113 | ||
| 115 | ylonger: | 114 | bignum_add_ylonger: |
| 116 | 115 | ||
| 117 | sub p, n | 116 | sub p, n |
| 118 | sub n, m | 117 | sub n, m |
| 119 | test m, m | 118 | test m, m |
| 120 | jz ytoploop | 119 | jz bignum_add_ytoploop |
| 121 | ymainloop: | 120 | bignum_add_ymainloop: |
| 122 | mov a, [x+8*i] | 121 | mov a, [x+8*i] |
| 123 | adc a, [y+8*i] | 122 | adc a, [y+8*i] |
| 124 | mov [z+8*i],a | 123 | mov [z+8*i],a |
| 125 | inc i | 124 | inc i |
| 126 | dec m | 125 | dec m |
| 127 | jnz ymainloop | 126 | jnz bignum_add_ymainloop |
| 128 | ytoploop: | 127 | bignum_add_ytoploop: |
| 129 | mov a, [y+8*i] | 128 | mov a, [y+8*i] |
| 130 | adc a, 0 | 129 | adc a, 0 |
| 131 | mov [z+8*i],a | 130 | mov [z+8*i],a |
| 132 | inc i | 131 | inc i |
| 133 | dec n | 132 | dec n |
| 134 | jnz ytoploop | 133 | jnz bignum_add_ytoploop |
| 135 | mov ashort, 0 | 134 | mov ashort, 0 |
| 136 | adc a, 0 | 135 | adc a, 0 |
| 137 | test p, p | 136 | test p, p |
| 138 | jnz tails | 137 | jnz bignum_add_tails |
| 139 | #if WINDOWS_ABI | 138 | #if WINDOWS_ABI |
| 140 | pop rsi | 139 | pop rsi |
| 141 | pop rdi | 140 | pop rdi |
| @@ -144,16 +143,16 @@ ytoploop: | |||
| 144 | 143 | ||
| 145 | // Adding a non-trivial tail, when p > max(m,n) | 144 | // Adding a non-trivial tail, when p > max(m,n) |
| 146 | 145 | ||
| 147 | tails: | 146 | bignum_add_tails: |
| 148 | mov [z+8*i],a | 147 | mov [z+8*i],a |
| 149 | xor a, a | 148 | xor a, a |
| 150 | jmp tail | 149 | jmp bignum_add_tail |
| 151 | tailloop: | 150 | bignum_add_tailloop: |
| 152 | mov [z+8*i],a | 151 | mov [z+8*i],a |
| 153 | tail: | 152 | bignum_add_tail: |
| 154 | inc i | 153 | inc i |
| 155 | dec p | 154 | dec p |
| 156 | jnz tailloop | 155 | jnz bignum_add_tailloop |
| 157 | #if WINDOWS_ABI | 156 | #if WINDOWS_ABI |
| 158 | pop rsi | 157 | pop rsi |
| 159 | pop rdi | 158 | pop rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S index 25ba17bce2..ebbacec344 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S | |||
| @@ -16,8 +16,8 @@ | |||
| 16 | // Multiply-add with single-word multiplier, z := z + c * y | 16 | // Multiply-add with single-word multiplier, z := z + c * y |
| 17 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] | 17 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] |
| 18 | // | 18 | // |
| 19 | // extern uint64_t bignum_cmadd | 19 | // extern uint64_t bignum_cmadd(uint64_t k, uint64_t *z, uint64_t c, uint64_t n, |
| 20 | // (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); | 20 | // const uint64_t *y); |
| 21 | // | 21 | // |
| 22 | // Does the "z := z + c * y" operation where y is n digits, result z is p. | 22 | // Does the "z := z + c * y" operation where y is n digits, result z is p. |
| 23 | // Truncates the result in general. | 23 | // Truncates the result in general. |
| @@ -54,7 +54,7 @@ | |||
| 54 | 54 | ||
| 55 | 55 | ||
| 56 | S2N_BN_SYMBOL(bignum_cmadd): | 56 | S2N_BN_SYMBOL(bignum_cmadd): |
| 57 | _CET_ENDBR | 57 | _CET_ENDBR |
| 58 | 58 | ||
| 59 | #if WINDOWS_ABI | 59 | #if WINDOWS_ABI |
| 60 | push rdi | 60 | push rdi |
| @@ -82,7 +82,7 @@ S2N_BN_SYMBOL(bignum_cmadd): | |||
| 82 | 82 | ||
| 83 | xor h, h | 83 | xor h, h |
| 84 | test n, n | 84 | test n, n |
| 85 | jz end | 85 | jz bignum_cmadd_end |
| 86 | 86 | ||
| 87 | // Move c into a safer register as multiplies overwrite rdx | 87 | // Move c into a safer register as multiplies overwrite rdx |
| 88 | 88 | ||
| @@ -96,11 +96,11 @@ S2N_BN_SYMBOL(bignum_cmadd): | |||
| 96 | mov h, rdx | 96 | mov h, rdx |
| 97 | mov ishort, 1 | 97 | mov ishort, 1 |
| 98 | dec n | 98 | dec n |
| 99 | jz hightail | 99 | jz bignum_cmadd_hightail |
| 100 | 100 | ||
| 101 | // Main loop, where we always have CF + previous high part h to add in | 101 | // Main loop, where we always have CF + previous high part h to add in |
| 102 | 102 | ||
| 103 | loop: | 103 | bignum_cmadd_loop: |
| 104 | adc h, [z+8*i] | 104 | adc h, [z+8*i] |
| 105 | sbb r, r | 105 | sbb r, r |
| 106 | mov rax, [x+8*i] | 106 | mov rax, [x+8*i] |
| @@ -111,36 +111,36 @@ loop: | |||
| 111 | mov h, rdx | 111 | mov h, rdx |
| 112 | inc i | 112 | inc i |
| 113 | dec n | 113 | dec n |
| 114 | jnz loop | 114 | jnz bignum_cmadd_loop |
| 115 | 115 | ||
| 116 | hightail: | 116 | bignum_cmadd_hightail: |
| 117 | adc h, 0 | 117 | adc h, 0 |
| 118 | 118 | ||
| 119 | // Propagate the carry all the way to the end with h as extra carry word | 119 | // Propagate the carry all the way to the end with h as extra carry word |
| 120 | 120 | ||
| 121 | tail: | 121 | bignum_cmadd_tail: |
| 122 | test p, p | 122 | test p, p |
| 123 | jz end | 123 | jz bignum_cmadd_end |
| 124 | 124 | ||
| 125 | add [z+8*i], h | 125 | add [z+8*i], h |
| 126 | mov hshort, 0 | 126 | mov hshort, 0 |
| 127 | inc i | 127 | inc i |
| 128 | dec p | 128 | dec p |
| 129 | jz highend | 129 | jz bignum_cmadd_highend |
| 130 | 130 | ||
| 131 | tloop: | 131 | bignum_cmadd_tloop: |
| 132 | adc [z+8*i], h | 132 | adc [z+8*i], h |
| 133 | inc i | 133 | inc i |
| 134 | dec p | 134 | dec p |
| 135 | jnz tloop | 135 | jnz bignum_cmadd_tloop |
| 136 | 136 | ||
| 137 | highend: | 137 | bignum_cmadd_highend: |
| 138 | 138 | ||
| 139 | adc h, 0 | 139 | adc h, 0 |
| 140 | 140 | ||
| 141 | // Return the high/carry word | 141 | // Return the high/carry word |
| 142 | 142 | ||
| 143 | end: | 143 | bignum_cmadd_end: |
| 144 | mov rax, h | 144 | mov rax, h |
| 145 | 145 | ||
| 146 | pop rbx | 146 | pop rbx |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S index 12f785d63a..3e28e37535 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S | |||
| @@ -16,8 +16,8 @@ | |||
| 16 | // Multiply by a single word, z := c * y | 16 | // Multiply by a single word, z := c * y |
| 17 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] | 17 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] |
| 18 | // | 18 | // |
| 19 | // extern uint64_t bignum_cmul | 19 | // extern uint64_t bignum_cmul(uint64_t k, uint64_t *z, uint64_t c, uint64_t n, |
| 20 | // (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); | 20 | // const uint64_t *y); |
| 21 | // | 21 | // |
| 22 | // Does the "z := c * y" operation where y is n digits, result z is p. | 22 | // Does the "z := c * y" operation where y is n digits, result z is p. |
| 23 | // Truncates the result in general unless p >= n + 1. | 23 | // Truncates the result in general unless p >= n + 1. |
| @@ -51,7 +51,7 @@ | |||
| 51 | 51 | ||
| 52 | 52 | ||
| 53 | S2N_BN_SYMBOL(bignum_cmul): | 53 | S2N_BN_SYMBOL(bignum_cmul): |
| 54 | _CET_ENDBR | 54 | _CET_ENDBR |
| 55 | 55 | ||
| 56 | #if WINDOWS_ABI | 56 | #if WINDOWS_ABI |
| 57 | push rdi | 57 | push rdi |
| @@ -76,7 +76,7 @@ S2N_BN_SYMBOL(bignum_cmul): | |||
| 76 | xor h, h | 76 | xor h, h |
| 77 | xor i, i | 77 | xor i, i |
| 78 | test n, n | 78 | test n, n |
| 79 | jz tail | 79 | jz bignum_cmul_tail |
| 80 | 80 | ||
| 81 | // Move c into a safer register as multiplies overwrite rdx | 81 | // Move c into a safer register as multiplies overwrite rdx |
| 82 | 82 | ||
| @@ -90,11 +90,11 @@ S2N_BN_SYMBOL(bignum_cmul): | |||
| 90 | mov h, rdx | 90 | mov h, rdx |
| 91 | inc i | 91 | inc i |
| 92 | cmp i, n | 92 | cmp i, n |
| 93 | jz tail | 93 | jz bignum_cmul_tail |
| 94 | 94 | ||
| 95 | // Main loop doing the multiplications | 95 | // Main loop doing the multiplications |
| 96 | 96 | ||
| 97 | loop: | 97 | bignum_cmul_loop: |
| 98 | mov rax, [x+8*i] | 98 | mov rax, [x+8*i] |
| 99 | mul c | 99 | mul c |
| 100 | add rax, h | 100 | add rax, h |
| @@ -103,28 +103,28 @@ loop: | |||
| 103 | mov h, rdx | 103 | mov h, rdx |
| 104 | inc i | 104 | inc i |
| 105 | cmp i, n | 105 | cmp i, n |
| 106 | jc loop | 106 | jc bignum_cmul_loop |
| 107 | 107 | ||
| 108 | // Add a tail when the destination is longer | 108 | // Add a tail when the destination is longer |
| 109 | 109 | ||
| 110 | tail: | 110 | bignum_cmul_tail: |
| 111 | cmp i, p | 111 | cmp i, p |
| 112 | jnc end | 112 | jnc bignum_cmul_end |
| 113 | mov [z+8*i], h | 113 | mov [z+8*i], h |
| 114 | xor h, h | 114 | xor h, h |
| 115 | inc i | 115 | inc i |
| 116 | cmp i, p | 116 | cmp i, p |
| 117 | jnc end | 117 | jnc bignum_cmul_end |
| 118 | 118 | ||
| 119 | tloop: | 119 | bignum_cmul_tloop: |
| 120 | mov [z+8*i], h | 120 | mov [z+8*i], h |
| 121 | inc i | 121 | inc i |
| 122 | cmp i, p | 122 | cmp i, p |
| 123 | jc tloop | 123 | jc bignum_cmul_tloop |
| 124 | 124 | ||
| 125 | // Return the high/carry word | 125 | // Return the high/carry word |
| 126 | 126 | ||
| 127 | end: | 127 | bignum_cmul_end: |
| 128 | mov rax, h | 128 | mov rax, h |
| 129 | 129 | ||
| 130 | #if WINDOWS_ABI | 130 | #if WINDOWS_ABI |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S index a3552679a2..3bc09de30a 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S | |||
| @@ -16,9 +16,8 @@ | |||
| 16 | // Multiply z := x * y | 16 | // Multiply z := x * y |
| 17 | // Inputs x[m], y[n]; output z[k] | 17 | // Inputs x[m], y[n]; output z[k] |
| 18 | // | 18 | // |
| 19 | // extern void bignum_mul | 19 | // extern void bignum_mul(uint64_t k, uint64_t *z, uint64_t m, const uint64_t *x, |
| 20 | // (uint64_t k, uint64_t *z, | 20 | // uint64_t n, const uint64_t *y); |
| 21 | // uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | ||
| 22 | // | 21 | // |
| 23 | // Does the "z := x * y" operation where x is m digits, y is n, result z is k. | 22 | // Does the "z := x * y" operation where x is m digits, y is n, result z is k. |
| 24 | // Truncates the result in general unless k >= m + n | 23 | // Truncates the result in general unless k >= m + n |
| @@ -59,7 +58,7 @@ | |||
| 59 | 58 | ||
| 60 | 59 | ||
| 61 | S2N_BN_SYMBOL(bignum_mul): | 60 | S2N_BN_SYMBOL(bignum_mul): |
| 62 | _CET_ENDBR | 61 | _CET_ENDBR |
| 63 | 62 | ||
| 64 | #if WINDOWS_ABI | 63 | #if WINDOWS_ABI |
| 65 | push rdi | 64 | push rdi |
| @@ -88,7 +87,7 @@ S2N_BN_SYMBOL(bignum_mul): | |||
| 88 | // If we did a multiply-add variant, however, then we could | 87 | // If we did a multiply-add variant, however, then we could |
| 89 | 88 | ||
| 90 | test p, p | 89 | test p, p |
| 91 | jz end | 90 | jz bignum_mul_end |
| 92 | 91 | ||
| 93 | // Set initial 2-part sum to zero (we zero c inside the body) | 92 | // Set initial 2-part sum to zero (we zero c inside the body) |
| 94 | 93 | ||
| @@ -99,7 +98,7 @@ S2N_BN_SYMBOL(bignum_mul): | |||
| 99 | 98 | ||
| 100 | xor k, k | 99 | xor k, k |
| 101 | 100 | ||
| 102 | outerloop: | 101 | bignum_mul_outerloop: |
| 103 | 102 | ||
| 104 | // Zero our carry term first; we eventually want it and a zero is useful now | 103 | // Zero our carry term first; we eventually want it and a zero is useful now |
| 105 | // Set a = max 0 (k + 1 - n), i = min (k + 1) m | 104 | // Set a = max 0 (k + 1 - n), i = min (k + 1) m |
| @@ -125,11 +124,11 @@ outerloop: | |||
| 125 | mov d, k | 124 | mov d, k |
| 126 | sub d, i | 125 | sub d, i |
| 127 | sub i, a | 126 | sub i, a |
| 128 | jbe innerend | 127 | jbe bignum_mul_innerend |
| 129 | lea x,[rcx+8*a] | 128 | lea x,[rcx+8*a] |
| 130 | lea y,[r9+8*d-8] | 129 | lea y,[r9+8*d-8] |
| 131 | 130 | ||
| 132 | innerloop: | 131 | bignum_mul_innerloop: |
| 133 | mov rax, [y+8*i] | 132 | mov rax, [y+8*i] |
| 134 | mul QWORD PTR [x] | 133 | mul QWORD PTR [x] |
| 135 | add x, 8 | 134 | add x, 8 |
| @@ -137,9 +136,9 @@ innerloop: | |||
| 137 | adc h, rdx | 136 | adc h, rdx |
| 138 | adc c, 0 | 137 | adc c, 0 |
| 139 | dec i | 138 | dec i |
| 140 | jnz innerloop | 139 | jnz bignum_mul_innerloop |
| 141 | 140 | ||
| 142 | innerend: | 141 | bignum_mul_innerend: |
| 143 | 142 | ||
| 144 | mov [z], l | 143 | mov [z], l |
| 145 | mov l, h | 144 | mov l, h |
| @@ -147,9 +146,9 @@ innerend: | |||
| 147 | add z, 8 | 146 | add z, 8 |
| 148 | 147 | ||
| 149 | cmp k, p | 148 | cmp k, p |
| 150 | jc outerloop | 149 | jc bignum_mul_outerloop |
| 151 | 150 | ||
| 152 | end: | 151 | bignum_mul_end: |
| 153 | pop r15 | 152 | pop r15 |
| 154 | pop r14 | 153 | pop r14 |
| 155 | pop r13 | 154 | pop r13 |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S index 70ff69e372..5e04bcc009 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S | |||
| @@ -16,8 +16,8 @@ | |||
| 16 | // Multiply z := x * y | 16 | // Multiply z := x * y |
| 17 | // Inputs x[4], y[4]; output z[8] | 17 | // Inputs x[4], y[4]; output z[8] |
| 18 | // | 18 | // |
| 19 | // extern void bignum_mul_4_8_alt | 19 | // extern void bignum_mul_4_8_alt(uint64_t z[static 8], const uint64_t x[static 4], |
| 20 | // (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]); | 20 | // const uint64_t y[static 4]); |
| 21 | // | 21 | // |
| 22 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y | 22 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y |
| 23 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y | 23 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y |
| @@ -72,7 +72,7 @@ | |||
| 72 | adc h, rdx | 72 | adc h, rdx |
| 73 | 73 | ||
| 74 | S2N_BN_SYMBOL(bignum_mul_4_8_alt): | 74 | S2N_BN_SYMBOL(bignum_mul_4_8_alt): |
| 75 | _CET_ENDBR | 75 | _CET_ENDBR |
| 76 | 76 | ||
| 77 | #if WINDOWS_ABI | 77 | #if WINDOWS_ABI |
| 78 | push rdi | 78 | push rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S index 066403b074..4d54168c90 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S | |||
| @@ -16,8 +16,9 @@ | |||
| 16 | // Multiply z := x * y | 16 | // Multiply z := x * y |
| 17 | // Inputs x[8], y[8]; output z[16] | 17 | // Inputs x[8], y[8]; output z[16] |
| 18 | // | 18 | // |
| 19 | // extern void bignum_mul_8_16_alt | 19 | // extern void bignum_mul_8_16_alt(uint64_t z[static 16], |
| 20 | // (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]); | 20 | // const uint64_t x[static 8], |
| 21 | // const uint64_t y[static 8]); | ||
| 21 | // | 22 | // |
| 22 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y | 23 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y |
| 23 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y | 24 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y |
| @@ -72,7 +73,7 @@ | |||
| 72 | adc h, rdx | 73 | adc h, rdx |
| 73 | 74 | ||
| 74 | S2N_BN_SYMBOL(bignum_mul_8_16_alt): | 75 | S2N_BN_SYMBOL(bignum_mul_8_16_alt): |
| 75 | _CET_ENDBR | 76 | _CET_ENDBR |
| 76 | 77 | ||
| 77 | #if WINDOWS_ABI | 78 | #if WINDOWS_ABI |
| 78 | push rdi | 79 | push rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S index 54e3f59442..48cc182b72 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S | |||
| @@ -16,8 +16,7 @@ | |||
| 16 | // Square z := x^2 | 16 | // Square z := x^2 |
| 17 | // Input x[n]; output z[k] | 17 | // Input x[n]; output z[k] |
| 18 | // | 18 | // |
| 19 | // extern void bignum_sqr | 19 | // extern void bignum_sqr(uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x); |
| 20 | // (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x); | ||
| 21 | // | 20 | // |
| 22 | // Does the "z := x^2" operation where x is n digits and result z is k. | 21 | // Does the "z := x^2" operation where x is n digits and result z is k. |
| 23 | // Truncates the result in general unless k >= 2 * n | 22 | // Truncates the result in general unless k >= 2 * n |
| @@ -62,7 +61,7 @@ | |||
| 62 | #define llshort ebp | 61 | #define llshort ebp |
| 63 | 62 | ||
| 64 | S2N_BN_SYMBOL(bignum_sqr): | 63 | S2N_BN_SYMBOL(bignum_sqr): |
| 65 | _CET_ENDBR | 64 | _CET_ENDBR |
| 66 | 65 | ||
| 67 | #if WINDOWS_ABI | 66 | #if WINDOWS_ABI |
| 68 | push rdi | 67 | push rdi |
| @@ -86,7 +85,7 @@ S2N_BN_SYMBOL(bignum_sqr): | |||
| 86 | // If p = 0 the result is trivial and nothing needs doing | 85 | // If p = 0 the result is trivial and nothing needs doing |
| 87 | 86 | ||
| 88 | test p, p | 87 | test p, p |
| 89 | jz end | 88 | jz bignum_sqr_end |
| 90 | 89 | ||
| 91 | // initialize (hh,ll) = 0 | 90 | // initialize (hh,ll) = 0 |
| 92 | 91 | ||
| @@ -97,7 +96,7 @@ S2N_BN_SYMBOL(bignum_sqr): | |||
| 97 | 96 | ||
| 98 | xor k, k | 97 | xor k, k |
| 99 | 98 | ||
| 100 | outerloop: | 99 | bignum_sqr_outerloop: |
| 101 | 100 | ||
| 102 | // First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n | 101 | // First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n |
| 103 | // We want to accumulate all x[i] * x[k - i] for bot <= i < top | 102 | // We want to accumulate all x[i] * x[k - i] for bot <= i < top |
| @@ -122,7 +121,7 @@ outerloop: | |||
| 122 | // If htop <= bot then main doubled part of the sum is empty | 121 | // If htop <= bot then main doubled part of the sum is empty |
| 123 | 122 | ||
| 124 | cmp i, htop | 123 | cmp i, htop |
| 125 | jnc nosumming | 124 | jnc bignum_sqr_nosumming |
| 126 | 125 | ||
| 127 | // Use a moving pointer for [y] = x[k-i] for the cofactor | 126 | // Use a moving pointer for [y] = x[k-i] for the cofactor |
| 128 | 127 | ||
| @@ -132,7 +131,7 @@ outerloop: | |||
| 132 | 131 | ||
| 133 | // Do the main part of the sum x[i] * x[k - i] for 2 * i < k | 132 | // Do the main part of the sum x[i] * x[k - i] for 2 * i < k |
| 134 | 133 | ||
| 135 | innerloop: | 134 | bignum_sqr_innerloop: |
| 136 | mov a, [x+8*i] | 135 | mov a, [x+8*i] |
| 137 | mul QWORD PTR [y] | 136 | mul QWORD PTR [y] |
| 138 | add l, a | 137 | add l, a |
| @@ -141,7 +140,7 @@ innerloop: | |||
| 141 | sub y, 8 | 140 | sub y, 8 |
| 142 | inc i | 141 | inc i |
| 143 | cmp i, htop | 142 | cmp i, htop |
| 144 | jc innerloop | 143 | jc bignum_sqr_innerloop |
| 145 | 144 | ||
| 146 | // Now double it | 145 | // Now double it |
| 147 | 146 | ||
| @@ -151,11 +150,11 @@ innerloop: | |||
| 151 | 150 | ||
| 152 | // If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term | 151 | // If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term |
| 153 | 152 | ||
| 154 | nosumming: | 153 | bignum_sqr_nosumming: |
| 155 | test k, 1 | 154 | test k, 1 |
| 156 | jnz innerend | 155 | jnz bignum_sqr_innerend |
| 157 | cmp i, n | 156 | cmp i, n |
| 158 | jnc innerend | 157 | jnc bignum_sqr_innerend |
| 159 | 158 | ||
| 160 | mov a, [x+8*i] | 159 | mov a, [x+8*i] |
| 161 | mul a | 160 | mul a |
| @@ -165,7 +164,7 @@ nosumming: | |||
| 165 | 164 | ||
| 166 | // Now add the local sum into the global sum, store and shift | 165 | // Now add the local sum into the global sum, store and shift |
| 167 | 166 | ||
| 168 | innerend: | 167 | bignum_sqr_innerend: |
| 169 | add l, ll | 168 | add l, ll |
| 170 | mov [z+8*k], l | 169 | mov [z+8*k], l |
| 171 | adc h, hh | 170 | adc h, hh |
| @@ -175,11 +174,11 @@ innerend: | |||
| 175 | 174 | ||
| 176 | inc k | 175 | inc k |
| 177 | cmp k, p | 176 | cmp k, p |
| 178 | jc outerloop | 177 | jc bignum_sqr_outerloop |
| 179 | 178 | ||
| 180 | // Restore registers and return | 179 | // Restore registers and return |
| 181 | 180 | ||
| 182 | end: | 181 | bignum_sqr_end: |
| 183 | pop r15 | 182 | pop r15 |
| 184 | pop r14 | 183 | pop r14 |
| 185 | pop r13 | 184 | pop r13 |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S index 7c534ae907..cb0eec0eea 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S | |||
| @@ -16,8 +16,8 @@ | |||
| 16 | // Square, z := x^2 | 16 | // Square, z := x^2 |
| 17 | // Input x[4]; output z[8] | 17 | // Input x[4]; output z[8] |
| 18 | // | 18 | // |
| 19 | // extern void bignum_sqr_4_8_alt | 19 | // extern void bignum_sqr_4_8_alt(uint64_t z[static 8], |
| 20 | // (uint64_t z[static 8], uint64_t x[static 4]); | 20 | // const uint64_t x[static 4]); |
| 21 | // | 21 | // |
| 22 | // Standard x86-64 ABI: RDI = z, RSI = x | 22 | // Standard x86-64 ABI: RDI = z, RSI = x |
| 23 | // Microsoft x64 ABI: RCX = z, RDX = x | 23 | // Microsoft x64 ABI: RCX = z, RDX = x |
| @@ -71,7 +71,7 @@ | |||
| 71 | adc c, 0 | 71 | adc c, 0 |
| 72 | 72 | ||
| 73 | S2N_BN_SYMBOL(bignum_sqr_4_8_alt): | 73 | S2N_BN_SYMBOL(bignum_sqr_4_8_alt): |
| 74 | _CET_ENDBR | 74 | _CET_ENDBR |
| 75 | 75 | ||
| 76 | #if WINDOWS_ABI | 76 | #if WINDOWS_ABI |
| 77 | push rdi | 77 | push rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S index ac0b6f96c2..04577d56cf 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S | |||
| @@ -16,7 +16,8 @@ | |||
| 16 | // Square, z := x^2 | 16 | // Square, z := x^2 |
| 17 | // Input x[8]; output z[16] | 17 | // Input x[8]; output z[16] |
| 18 | // | 18 | // |
| 19 | // extern void bignum_sqr_8_16_alt (uint64_t z[static 16], uint64_t x[static 8]); | 19 | // extern void bignum_sqr_8_16_alt(uint64_t z[static 16], |
| 20 | // const uint64_t x[static 8]); | ||
| 20 | // | 21 | // |
| 21 | // Standard x86-64 ABI: RDI = z, RSI = x | 22 | // Standard x86-64 ABI: RDI = z, RSI = x |
| 22 | // Microsoft x64 ABI: RCX = z, RDX = x | 23 | // Microsoft x64 ABI: RCX = z, RDX = x |
| @@ -103,7 +104,7 @@ | |||
| 103 | adc c, 0 | 104 | adc c, 0 |
| 104 | 105 | ||
| 105 | S2N_BN_SYMBOL(bignum_sqr_8_16_alt): | 106 | S2N_BN_SYMBOL(bignum_sqr_8_16_alt): |
| 106 | _CET_ENDBR | 107 | _CET_ENDBR |
| 107 | 108 | ||
| 108 | #if WINDOWS_ABI | 109 | #if WINDOWS_ABI |
| 109 | push rdi | 110 | push rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S index 3ff8a30510..a18e86ba7c 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S | |||
| @@ -16,9 +16,8 @@ | |||
| 16 | // Subtract, z := x - y | 16 | // Subtract, z := x - y |
| 17 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] | 17 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] |
| 18 | // | 18 | // |
| 19 | // extern uint64_t bignum_sub | 19 | // extern uint64_t bignum_sub(uint64_t p, uint64_t *z, uint64_t m, |
| 20 | // (uint64_t p, uint64_t *z, | 20 | // const uint64_t *x, uint64_t n, const uint64_t *y); |
| 21 | // uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | ||
| 22 | // | 21 | // |
| 23 | // Does the z := x - y operation, truncating modulo p words in general and | 22 | // Does the z := x - y operation, truncating modulo p words in general and |
| 24 | // returning a top borrow (0 or 1) in the p'th place, only subtracting input | 23 | // returning a top borrow (0 or 1) in the p'th place, only subtracting input |
| @@ -49,7 +48,7 @@ | |||
| 49 | 48 | ||
| 50 | 49 | ||
| 51 | S2N_BN_SYMBOL(bignum_sub): | 50 | S2N_BN_SYMBOL(bignum_sub): |
| 52 | _CET_ENDBR | 51 | _CET_ENDBR |
| 53 | 52 | ||
| 54 | #if WINDOWS_ABI | 53 | #if WINDOWS_ABI |
| 55 | push rdi | 54 | push rdi |
| @@ -75,7 +74,7 @@ S2N_BN_SYMBOL(bignum_sub): | |||
| 75 | cmp p, n | 74 | cmp p, n |
| 76 | cmovc n, p | 75 | cmovc n, p |
| 77 | cmp m, n | 76 | cmp m, n |
| 78 | jc ylonger | 77 | jc bignum_sub_ylonger |
| 79 | 78 | ||
| 80 | // The case where x is longer or of the same size (p >= m >= n) | 79 | // The case where x is longer or of the same size (p >= m >= n) |
| 81 | 80 | ||
| @@ -83,32 +82,32 @@ S2N_BN_SYMBOL(bignum_sub): | |||
| 83 | sub m, n | 82 | sub m, n |
| 84 | inc m | 83 | inc m |
| 85 | test n, n | 84 | test n, n |
| 86 | jz xtest | 85 | jz bignum_sub_xtest |
| 87 | xmainloop: | 86 | bignum_sub_xmainloop: |
| 88 | mov a, [x+8*i] | 87 | mov a, [x+8*i] |
| 89 | sbb a, [y+8*i] | 88 | sbb a, [y+8*i] |
| 90 | mov [z+8*i],a | 89 | mov [z+8*i],a |
| 91 | inc i | 90 | inc i |
| 92 | dec n | 91 | dec n |
| 93 | jnz xmainloop | 92 | jnz bignum_sub_xmainloop |
| 94 | jmp xtest | 93 | jmp bignum_sub_xtest |
| 95 | xtoploop: | 94 | bignum_sub_xtoploop: |
| 96 | mov a, [x+8*i] | 95 | mov a, [x+8*i] |
| 97 | sbb a, 0 | 96 | sbb a, 0 |
| 98 | mov [z+8*i],a | 97 | mov [z+8*i],a |
| 99 | inc i | 98 | inc i |
| 100 | xtest: | 99 | bignum_sub_xtest: |
| 101 | dec m | 100 | dec m |
| 102 | jnz xtoploop | 101 | jnz bignum_sub_xtoploop |
| 103 | sbb a, a | 102 | sbb a, a |
| 104 | test p, p | 103 | test p, p |
| 105 | jz tailskip | 104 | jz bignum_sub_tailskip |
| 106 | tailloop: | 105 | bignum_sub_tailloop: |
| 107 | mov [z+8*i],a | 106 | mov [z+8*i],a |
| 108 | inc i | 107 | inc i |
| 109 | dec p | 108 | dec p |
| 110 | jnz tailloop | 109 | jnz bignum_sub_tailloop |
| 111 | tailskip: | 110 | bignum_sub_tailskip: |
| 112 | neg a | 111 | neg a |
| 113 | #if WINDOWS_ABI | 112 | #if WINDOWS_ABI |
| 114 | pop rsi | 113 | pop rsi |
| @@ -118,29 +117,29 @@ tailskip: | |||
| 118 | 117 | ||
| 119 | // The case where y is longer (p >= n > m) | 118 | // The case where y is longer (p >= n > m) |
| 120 | 119 | ||
| 121 | ylonger: | 120 | bignum_sub_ylonger: |
| 122 | 121 | ||
| 123 | sub p, n | 122 | sub p, n |
| 124 | sub n, m | 123 | sub n, m |
| 125 | test m, m | 124 | test m, m |
| 126 | jz ytoploop | 125 | jz bignum_sub_ytoploop |
| 127 | ymainloop: | 126 | bignum_sub_ymainloop: |
| 128 | mov a, [x+8*i] | 127 | mov a, [x+8*i] |
| 129 | sbb a, [y+8*i] | 128 | sbb a, [y+8*i] |
| 130 | mov [z+8*i],a | 129 | mov [z+8*i],a |
| 131 | inc i | 130 | inc i |
| 132 | dec m | 131 | dec m |
| 133 | jnz ymainloop | 132 | jnz bignum_sub_ymainloop |
| 134 | ytoploop: | 133 | bignum_sub_ytoploop: |
| 135 | mov ashort, 0 | 134 | mov ashort, 0 |
| 136 | sbb a, [y+8*i] | 135 | sbb a, [y+8*i] |
| 137 | mov [z+8*i],a | 136 | mov [z+8*i],a |
| 138 | inc i | 137 | inc i |
| 139 | dec n | 138 | dec n |
| 140 | jnz ytoploop | 139 | jnz bignum_sub_ytoploop |
| 141 | sbb a, a | 140 | sbb a, a |
| 142 | test p, p | 141 | test p, p |
| 143 | jnz tailloop | 142 | jnz bignum_sub_tailloop |
| 144 | neg a | 143 | neg a |
| 145 | #if WINDOWS_ABI | 144 | #if WINDOWS_ABI |
| 146 | pop rsi | 145 | pop rsi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/word_clz.S b/src/lib/libcrypto/bn/arch/amd64/word_clz.S index 3926fcd4b0..84c9c8275d 100644 --- a/src/lib/libcrypto/bn/arch/amd64/word_clz.S +++ b/src/lib/libcrypto/bn/arch/amd64/word_clz.S | |||
| @@ -16,7 +16,7 @@ | |||
| 16 | // Count leading zero bits in a single word | 16 | // Count leading zero bits in a single word |
| 17 | // Input a; output function return | 17 | // Input a; output function return |
| 18 | // | 18 | // |
| 19 | // extern uint64_t word_clz (uint64_t a); | 19 | // extern uint64_t word_clz(uint64_t a); |
| 20 | // | 20 | // |
| 21 | // Standard x86-64 ABI: RDI = a, returns RAX | 21 | // Standard x86-64 ABI: RDI = a, returns RAX |
| 22 | // Microsoft x64 ABI: RCX = a, returns RAX | 22 | // Microsoft x64 ABI: RCX = a, returns RAX |
| @@ -30,7 +30,7 @@ | |||
| 30 | .text | 30 | .text |
| 31 | 31 | ||
| 32 | S2N_BN_SYMBOL(word_clz): | 32 | S2N_BN_SYMBOL(word_clz): |
| 33 | _CET_ENDBR | 33 | _CET_ENDBR |
| 34 | 34 | ||
| 35 | #if WINDOWS_ABI | 35 | #if WINDOWS_ABI |
| 36 | push rdi | 36 | push rdi |
