diff options
author | jsing <> | 2025-08-11 14:11:20 +0000 |
---|---|---|
committer | jsing <> | 2025-08-11 14:11:20 +0000 |
commit | 28d52ec2924676a240d0477f564160bd054d5549 (patch) | |
tree | 22e2b3da2203c9ba206a85da5dd6bf7a67c0a1e8 /src | |
parent | 4fd25eed849a1e1cc5f0497cf86579813203b406 (diff) | |
download | openbsd-28d52ec2924676a240d0477f564160bd054d5549.tar.gz openbsd-28d52ec2924676a240d0477f564160bd054d5549.tar.bz2 openbsd-28d52ec2924676a240d0477f564160bd054d5549.zip |
Resync s2n-bignum primitives for amd64 with upstream.
This amounts to whitespace changes and label renaming.
Diffstat (limited to 'src')
-rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_add.S | 49 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S | 30 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S | 26 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_mul.S | 23 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S | 6 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S | 7 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S | 27 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S | 6 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S | 5 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bignum_sub.S | 45 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/word_clz.S | 4 |
11 files changed, 113 insertions, 115 deletions
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S b/src/lib/libcrypto/bn/arch/amd64/bignum_add.S index 5fe4aae7a1..5ec0e36282 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_add.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_add.S | |||
@@ -16,9 +16,8 @@ | |||
16 | // Add, z := x + y | 16 | // Add, z := x + y |
17 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] | 17 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] |
18 | // | 18 | // |
19 | // extern uint64_t bignum_add | 19 | // extern uint64_t bignum_add(uint64_t p, uint64_t *z, uint64_t m, |
20 | // (uint64_t p, uint64_t *z, | 20 | // const uint64_t *x, uint64_t n, const uint64_t *y); |
21 | // uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | ||
22 | // | 21 | // |
23 | // Does the z := x + y operation, truncating modulo p words in general and | 22 | // Does the z := x + y operation, truncating modulo p words in general and |
24 | // returning a top carry (0 or 1) in the p'th place, only adding the input | 23 | // returning a top carry (0 or 1) in the p'th place, only adding the input |
@@ -49,7 +48,7 @@ | |||
49 | 48 | ||
50 | 49 | ||
51 | S2N_BN_SYMBOL(bignum_add): | 50 | S2N_BN_SYMBOL(bignum_add): |
52 | _CET_ENDBR | 51 | _CET_ENDBR |
53 | 52 | ||
54 | #if WINDOWS_ABI | 53 | #if WINDOWS_ABI |
55 | push rdi | 54 | push rdi |
@@ -75,7 +74,7 @@ S2N_BN_SYMBOL(bignum_add): | |||
75 | cmp p, n | 74 | cmp p, n |
76 | cmovc n, p | 75 | cmovc n, p |
77 | cmp m, n | 76 | cmp m, n |
78 | jc ylonger | 77 | jc bignum_add_ylonger |
79 | 78 | ||
80 | // The case where x is longer or of the same size (p >= m >= n) | 79 | // The case where x is longer or of the same size (p >= m >= n) |
81 | 80 | ||
@@ -83,27 +82,27 @@ S2N_BN_SYMBOL(bignum_add): | |||
83 | sub m, n | 82 | sub m, n |
84 | inc m | 83 | inc m |
85 | test n, n | 84 | test n, n |
86 | jz xtest | 85 | jz bignum_add_xtest |
87 | xmainloop: | 86 | bignum_add_xmainloop: |
88 | mov a, [x+8*i] | 87 | mov a, [x+8*i] |
89 | adc a, [y+8*i] | 88 | adc a, [y+8*i] |
90 | mov [z+8*i],a | 89 | mov [z+8*i],a |
91 | inc i | 90 | inc i |
92 | dec n | 91 | dec n |
93 | jnz xmainloop | 92 | jnz bignum_add_xmainloop |
94 | jmp xtest | 93 | jmp bignum_add_xtest |
95 | xtoploop: | 94 | bignum_add_xtoploop: |
96 | mov a, [x+8*i] | 95 | mov a, [x+8*i] |
97 | adc a, 0 | 96 | adc a, 0 |
98 | mov [z+8*i],a | 97 | mov [z+8*i],a |
99 | inc i | 98 | inc i |
100 | xtest: | 99 | bignum_add_xtest: |
101 | dec m | 100 | dec m |
102 | jnz xtoploop | 101 | jnz bignum_add_xtoploop |
103 | mov ashort, 0 | 102 | mov ashort, 0 |
104 | adc a, 0 | 103 | adc a, 0 |
105 | test p, p | 104 | test p, p |
106 | jnz tails | 105 | jnz bignum_add_tails |
107 | #if WINDOWS_ABI | 106 | #if WINDOWS_ABI |
108 | pop rsi | 107 | pop rsi |
109 | pop rdi | 108 | pop rdi |
@@ -112,30 +111,30 @@ xtest: | |||
112 | 111 | ||
113 | // The case where y is longer (p >= n > m) | 112 | // The case where y is longer (p >= n > m) |
114 | 113 | ||
115 | ylonger: | 114 | bignum_add_ylonger: |
116 | 115 | ||
117 | sub p, n | 116 | sub p, n |
118 | sub n, m | 117 | sub n, m |
119 | test m, m | 118 | test m, m |
120 | jz ytoploop | 119 | jz bignum_add_ytoploop |
121 | ymainloop: | 120 | bignum_add_ymainloop: |
122 | mov a, [x+8*i] | 121 | mov a, [x+8*i] |
123 | adc a, [y+8*i] | 122 | adc a, [y+8*i] |
124 | mov [z+8*i],a | 123 | mov [z+8*i],a |
125 | inc i | 124 | inc i |
126 | dec m | 125 | dec m |
127 | jnz ymainloop | 126 | jnz bignum_add_ymainloop |
128 | ytoploop: | 127 | bignum_add_ytoploop: |
129 | mov a, [y+8*i] | 128 | mov a, [y+8*i] |
130 | adc a, 0 | 129 | adc a, 0 |
131 | mov [z+8*i],a | 130 | mov [z+8*i],a |
132 | inc i | 131 | inc i |
133 | dec n | 132 | dec n |
134 | jnz ytoploop | 133 | jnz bignum_add_ytoploop |
135 | mov ashort, 0 | 134 | mov ashort, 0 |
136 | adc a, 0 | 135 | adc a, 0 |
137 | test p, p | 136 | test p, p |
138 | jnz tails | 137 | jnz bignum_add_tails |
139 | #if WINDOWS_ABI | 138 | #if WINDOWS_ABI |
140 | pop rsi | 139 | pop rsi |
141 | pop rdi | 140 | pop rdi |
@@ -144,16 +143,16 @@ ytoploop: | |||
144 | 143 | ||
145 | // Adding a non-trivial tail, when p > max(m,n) | 144 | // Adding a non-trivial tail, when p > max(m,n) |
146 | 145 | ||
147 | tails: | 146 | bignum_add_tails: |
148 | mov [z+8*i],a | 147 | mov [z+8*i],a |
149 | xor a, a | 148 | xor a, a |
150 | jmp tail | 149 | jmp bignum_add_tail |
151 | tailloop: | 150 | bignum_add_tailloop: |
152 | mov [z+8*i],a | 151 | mov [z+8*i],a |
153 | tail: | 152 | bignum_add_tail: |
154 | inc i | 153 | inc i |
155 | dec p | 154 | dec p |
156 | jnz tailloop | 155 | jnz bignum_add_tailloop |
157 | #if WINDOWS_ABI | 156 | #if WINDOWS_ABI |
158 | pop rsi | 157 | pop rsi |
159 | pop rdi | 158 | pop rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S index 25ba17bce2..ebbacec344 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_cmadd.S | |||
@@ -16,8 +16,8 @@ | |||
16 | // Multiply-add with single-word multiplier, z := z + c * y | 16 | // Multiply-add with single-word multiplier, z := z + c * y |
17 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] | 17 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] |
18 | // | 18 | // |
19 | // extern uint64_t bignum_cmadd | 19 | // extern uint64_t bignum_cmadd(uint64_t k, uint64_t *z, uint64_t c, uint64_t n, |
20 | // (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); | 20 | // const uint64_t *y); |
21 | // | 21 | // |
22 | // Does the "z := z + c * y" operation where y is n digits, result z is p. | 22 | // Does the "z := z + c * y" operation where y is n digits, result z is p. |
23 | // Truncates the result in general. | 23 | // Truncates the result in general. |
@@ -54,7 +54,7 @@ | |||
54 | 54 | ||
55 | 55 | ||
56 | S2N_BN_SYMBOL(bignum_cmadd): | 56 | S2N_BN_SYMBOL(bignum_cmadd): |
57 | _CET_ENDBR | 57 | _CET_ENDBR |
58 | 58 | ||
59 | #if WINDOWS_ABI | 59 | #if WINDOWS_ABI |
60 | push rdi | 60 | push rdi |
@@ -82,7 +82,7 @@ S2N_BN_SYMBOL(bignum_cmadd): | |||
82 | 82 | ||
83 | xor h, h | 83 | xor h, h |
84 | test n, n | 84 | test n, n |
85 | jz end | 85 | jz bignum_cmadd_end |
86 | 86 | ||
87 | // Move c into a safer register as multiplies overwrite rdx | 87 | // Move c into a safer register as multiplies overwrite rdx |
88 | 88 | ||
@@ -96,11 +96,11 @@ S2N_BN_SYMBOL(bignum_cmadd): | |||
96 | mov h, rdx | 96 | mov h, rdx |
97 | mov ishort, 1 | 97 | mov ishort, 1 |
98 | dec n | 98 | dec n |
99 | jz hightail | 99 | jz bignum_cmadd_hightail |
100 | 100 | ||
101 | // Main loop, where we always have CF + previous high part h to add in | 101 | // Main loop, where we always have CF + previous high part h to add in |
102 | 102 | ||
103 | loop: | 103 | bignum_cmadd_loop: |
104 | adc h, [z+8*i] | 104 | adc h, [z+8*i] |
105 | sbb r, r | 105 | sbb r, r |
106 | mov rax, [x+8*i] | 106 | mov rax, [x+8*i] |
@@ -111,36 +111,36 @@ loop: | |||
111 | mov h, rdx | 111 | mov h, rdx |
112 | inc i | 112 | inc i |
113 | dec n | 113 | dec n |
114 | jnz loop | 114 | jnz bignum_cmadd_loop |
115 | 115 | ||
116 | hightail: | 116 | bignum_cmadd_hightail: |
117 | adc h, 0 | 117 | adc h, 0 |
118 | 118 | ||
119 | // Propagate the carry all the way to the end with h as extra carry word | 119 | // Propagate the carry all the way to the end with h as extra carry word |
120 | 120 | ||
121 | tail: | 121 | bignum_cmadd_tail: |
122 | test p, p | 122 | test p, p |
123 | jz end | 123 | jz bignum_cmadd_end |
124 | 124 | ||
125 | add [z+8*i], h | 125 | add [z+8*i], h |
126 | mov hshort, 0 | 126 | mov hshort, 0 |
127 | inc i | 127 | inc i |
128 | dec p | 128 | dec p |
129 | jz highend | 129 | jz bignum_cmadd_highend |
130 | 130 | ||
131 | tloop: | 131 | bignum_cmadd_tloop: |
132 | adc [z+8*i], h | 132 | adc [z+8*i], h |
133 | inc i | 133 | inc i |
134 | dec p | 134 | dec p |
135 | jnz tloop | 135 | jnz bignum_cmadd_tloop |
136 | 136 | ||
137 | highend: | 137 | bignum_cmadd_highend: |
138 | 138 | ||
139 | adc h, 0 | 139 | adc h, 0 |
140 | 140 | ||
141 | // Return the high/carry word | 141 | // Return the high/carry word |
142 | 142 | ||
143 | end: | 143 | bignum_cmadd_end: |
144 | mov rax, h | 144 | mov rax, h |
145 | 145 | ||
146 | pop rbx | 146 | pop rbx |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S index 12f785d63a..3e28e37535 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S | |||
@@ -16,8 +16,8 @@ | |||
16 | // Multiply by a single word, z := c * y | 16 | // Multiply by a single word, z := c * y |
17 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] | 17 | // Inputs c, y[n]; outputs function return (carry-out) and z[k] |
18 | // | 18 | // |
19 | // extern uint64_t bignum_cmul | 19 | // extern uint64_t bignum_cmul(uint64_t k, uint64_t *z, uint64_t c, uint64_t n, |
20 | // (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); | 20 | // const uint64_t *y); |
21 | // | 21 | // |
22 | // Does the "z := c * y" operation where y is n digits, result z is p. | 22 | // Does the "z := c * y" operation where y is n digits, result z is p. |
23 | // Truncates the result in general unless p >= n + 1. | 23 | // Truncates the result in general unless p >= n + 1. |
@@ -51,7 +51,7 @@ | |||
51 | 51 | ||
52 | 52 | ||
53 | S2N_BN_SYMBOL(bignum_cmul): | 53 | S2N_BN_SYMBOL(bignum_cmul): |
54 | _CET_ENDBR | 54 | _CET_ENDBR |
55 | 55 | ||
56 | #if WINDOWS_ABI | 56 | #if WINDOWS_ABI |
57 | push rdi | 57 | push rdi |
@@ -76,7 +76,7 @@ S2N_BN_SYMBOL(bignum_cmul): | |||
76 | xor h, h | 76 | xor h, h |
77 | xor i, i | 77 | xor i, i |
78 | test n, n | 78 | test n, n |
79 | jz tail | 79 | jz bignum_cmul_tail |
80 | 80 | ||
81 | // Move c into a safer register as multiplies overwrite rdx | 81 | // Move c into a safer register as multiplies overwrite rdx |
82 | 82 | ||
@@ -90,11 +90,11 @@ S2N_BN_SYMBOL(bignum_cmul): | |||
90 | mov h, rdx | 90 | mov h, rdx |
91 | inc i | 91 | inc i |
92 | cmp i, n | 92 | cmp i, n |
93 | jz tail | 93 | jz bignum_cmul_tail |
94 | 94 | ||
95 | // Main loop doing the multiplications | 95 | // Main loop doing the multiplications |
96 | 96 | ||
97 | loop: | 97 | bignum_cmul_loop: |
98 | mov rax, [x+8*i] | 98 | mov rax, [x+8*i] |
99 | mul c | 99 | mul c |
100 | add rax, h | 100 | add rax, h |
@@ -103,28 +103,28 @@ loop: | |||
103 | mov h, rdx | 103 | mov h, rdx |
104 | inc i | 104 | inc i |
105 | cmp i, n | 105 | cmp i, n |
106 | jc loop | 106 | jc bignum_cmul_loop |
107 | 107 | ||
108 | // Add a tail when the destination is longer | 108 | // Add a tail when the destination is longer |
109 | 109 | ||
110 | tail: | 110 | bignum_cmul_tail: |
111 | cmp i, p | 111 | cmp i, p |
112 | jnc end | 112 | jnc bignum_cmul_end |
113 | mov [z+8*i], h | 113 | mov [z+8*i], h |
114 | xor h, h | 114 | xor h, h |
115 | inc i | 115 | inc i |
116 | cmp i, p | 116 | cmp i, p |
117 | jnc end | 117 | jnc bignum_cmul_end |
118 | 118 | ||
119 | tloop: | 119 | bignum_cmul_tloop: |
120 | mov [z+8*i], h | 120 | mov [z+8*i], h |
121 | inc i | 121 | inc i |
122 | cmp i, p | 122 | cmp i, p |
123 | jc tloop | 123 | jc bignum_cmul_tloop |
124 | 124 | ||
125 | // Return the high/carry word | 125 | // Return the high/carry word |
126 | 126 | ||
127 | end: | 127 | bignum_cmul_end: |
128 | mov rax, h | 128 | mov rax, h |
129 | 129 | ||
130 | #if WINDOWS_ABI | 130 | #if WINDOWS_ABI |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S index a3552679a2..3bc09de30a 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul.S | |||
@@ -16,9 +16,8 @@ | |||
16 | // Multiply z := x * y | 16 | // Multiply z := x * y |
17 | // Inputs x[m], y[n]; output z[k] | 17 | // Inputs x[m], y[n]; output z[k] |
18 | // | 18 | // |
19 | // extern void bignum_mul | 19 | // extern void bignum_mul(uint64_t k, uint64_t *z, uint64_t m, const uint64_t *x, |
20 | // (uint64_t k, uint64_t *z, | 20 | // uint64_t n, const uint64_t *y); |
21 | // uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | ||
22 | // | 21 | // |
23 | // Does the "z := x * y" operation where x is m digits, y is n, result z is k. | 22 | // Does the "z := x * y" operation where x is m digits, y is n, result z is k. |
24 | // Truncates the result in general unless k >= m + n | 23 | // Truncates the result in general unless k >= m + n |
@@ -59,7 +58,7 @@ | |||
59 | 58 | ||
60 | 59 | ||
61 | S2N_BN_SYMBOL(bignum_mul): | 60 | S2N_BN_SYMBOL(bignum_mul): |
62 | _CET_ENDBR | 61 | _CET_ENDBR |
63 | 62 | ||
64 | #if WINDOWS_ABI | 63 | #if WINDOWS_ABI |
65 | push rdi | 64 | push rdi |
@@ -88,7 +87,7 @@ S2N_BN_SYMBOL(bignum_mul): | |||
88 | // If we did a multiply-add variant, however, then we could | 87 | // If we did a multiply-add variant, however, then we could |
89 | 88 | ||
90 | test p, p | 89 | test p, p |
91 | jz end | 90 | jz bignum_mul_end |
92 | 91 | ||
93 | // Set initial 2-part sum to zero (we zero c inside the body) | 92 | // Set initial 2-part sum to zero (we zero c inside the body) |
94 | 93 | ||
@@ -99,7 +98,7 @@ S2N_BN_SYMBOL(bignum_mul): | |||
99 | 98 | ||
100 | xor k, k | 99 | xor k, k |
101 | 100 | ||
102 | outerloop: | 101 | bignum_mul_outerloop: |
103 | 102 | ||
104 | // Zero our carry term first; we eventually want it and a zero is useful now | 103 | // Zero our carry term first; we eventually want it and a zero is useful now |
105 | // Set a = max 0 (k + 1 - n), i = min (k + 1) m | 104 | // Set a = max 0 (k + 1 - n), i = min (k + 1) m |
@@ -125,11 +124,11 @@ outerloop: | |||
125 | mov d, k | 124 | mov d, k |
126 | sub d, i | 125 | sub d, i |
127 | sub i, a | 126 | sub i, a |
128 | jbe innerend | 127 | jbe bignum_mul_innerend |
129 | lea x,[rcx+8*a] | 128 | lea x,[rcx+8*a] |
130 | lea y,[r9+8*d-8] | 129 | lea y,[r9+8*d-8] |
131 | 130 | ||
132 | innerloop: | 131 | bignum_mul_innerloop: |
133 | mov rax, [y+8*i] | 132 | mov rax, [y+8*i] |
134 | mul QWORD PTR [x] | 133 | mul QWORD PTR [x] |
135 | add x, 8 | 134 | add x, 8 |
@@ -137,9 +136,9 @@ innerloop: | |||
137 | adc h, rdx | 136 | adc h, rdx |
138 | adc c, 0 | 137 | adc c, 0 |
139 | dec i | 138 | dec i |
140 | jnz innerloop | 139 | jnz bignum_mul_innerloop |
141 | 140 | ||
142 | innerend: | 141 | bignum_mul_innerend: |
143 | 142 | ||
144 | mov [z], l | 143 | mov [z], l |
145 | mov l, h | 144 | mov l, h |
@@ -147,9 +146,9 @@ innerend: | |||
147 | add z, 8 | 146 | add z, 8 |
148 | 147 | ||
149 | cmp k, p | 148 | cmp k, p |
150 | jc outerloop | 149 | jc bignum_mul_outerloop |
151 | 150 | ||
152 | end: | 151 | bignum_mul_end: |
153 | pop r15 | 152 | pop r15 |
154 | pop r14 | 153 | pop r14 |
155 | pop r13 | 154 | pop r13 |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S index 70ff69e372..5e04bcc009 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_4_8_alt.S | |||
@@ -16,8 +16,8 @@ | |||
16 | // Multiply z := x * y | 16 | // Multiply z := x * y |
17 | // Inputs x[4], y[4]; output z[8] | 17 | // Inputs x[4], y[4]; output z[8] |
18 | // | 18 | // |
19 | // extern void bignum_mul_4_8_alt | 19 | // extern void bignum_mul_4_8_alt(uint64_t z[static 8], const uint64_t x[static 4], |
20 | // (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]); | 20 | // const uint64_t y[static 4]); |
21 | // | 21 | // |
22 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y | 22 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y |
23 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y | 23 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y |
@@ -72,7 +72,7 @@ | |||
72 | adc h, rdx | 72 | adc h, rdx |
73 | 73 | ||
74 | S2N_BN_SYMBOL(bignum_mul_4_8_alt): | 74 | S2N_BN_SYMBOL(bignum_mul_4_8_alt): |
75 | _CET_ENDBR | 75 | _CET_ENDBR |
76 | 76 | ||
77 | #if WINDOWS_ABI | 77 | #if WINDOWS_ABI |
78 | push rdi | 78 | push rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S index 066403b074..4d54168c90 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_mul_8_16_alt.S | |||
@@ -16,8 +16,9 @@ | |||
16 | // Multiply z := x * y | 16 | // Multiply z := x * y |
17 | // Inputs x[8], y[8]; output z[16] | 17 | // Inputs x[8], y[8]; output z[16] |
18 | // | 18 | // |
19 | // extern void bignum_mul_8_16_alt | 19 | // extern void bignum_mul_8_16_alt(uint64_t z[static 16], |
20 | // (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]); | 20 | // const uint64_t x[static 8], |
21 | // const uint64_t y[static 8]); | ||
21 | // | 22 | // |
22 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y | 23 | // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y |
23 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y | 24 | // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y |
@@ -72,7 +73,7 @@ | |||
72 | adc h, rdx | 73 | adc h, rdx |
73 | 74 | ||
74 | S2N_BN_SYMBOL(bignum_mul_8_16_alt): | 75 | S2N_BN_SYMBOL(bignum_mul_8_16_alt): |
75 | _CET_ENDBR | 76 | _CET_ENDBR |
76 | 77 | ||
77 | #if WINDOWS_ABI | 78 | #if WINDOWS_ABI |
78 | push rdi | 79 | push rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S index 54e3f59442..48cc182b72 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr.S | |||
@@ -16,8 +16,7 @@ | |||
16 | // Square z := x^2 | 16 | // Square z := x^2 |
17 | // Input x[n]; output z[k] | 17 | // Input x[n]; output z[k] |
18 | // | 18 | // |
19 | // extern void bignum_sqr | 19 | // extern void bignum_sqr(uint64_t k, uint64_t *z, uint64_t n, const uint64_t *x); |
20 | // (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x); | ||
21 | // | 20 | // |
22 | // Does the "z := x^2" operation where x is n digits and result z is k. | 21 | // Does the "z := x^2" operation where x is n digits and result z is k. |
23 | // Truncates the result in general unless k >= 2 * n | 22 | // Truncates the result in general unless k >= 2 * n |
@@ -62,7 +61,7 @@ | |||
62 | #define llshort ebp | 61 | #define llshort ebp |
63 | 62 | ||
64 | S2N_BN_SYMBOL(bignum_sqr): | 63 | S2N_BN_SYMBOL(bignum_sqr): |
65 | _CET_ENDBR | 64 | _CET_ENDBR |
66 | 65 | ||
67 | #if WINDOWS_ABI | 66 | #if WINDOWS_ABI |
68 | push rdi | 67 | push rdi |
@@ -86,7 +85,7 @@ S2N_BN_SYMBOL(bignum_sqr): | |||
86 | // If p = 0 the result is trivial and nothing needs doing | 85 | // If p = 0 the result is trivial and nothing needs doing |
87 | 86 | ||
88 | test p, p | 87 | test p, p |
89 | jz end | 88 | jz bignum_sqr_end |
90 | 89 | ||
91 | // initialize (hh,ll) = 0 | 90 | // initialize (hh,ll) = 0 |
92 | 91 | ||
@@ -97,7 +96,7 @@ S2N_BN_SYMBOL(bignum_sqr): | |||
97 | 96 | ||
98 | xor k, k | 97 | xor k, k |
99 | 98 | ||
100 | outerloop: | 99 | bignum_sqr_outerloop: |
101 | 100 | ||
102 | // First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n | 101 | // First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n |
103 | // We want to accumulate all x[i] * x[k - i] for bot <= i < top | 102 | // We want to accumulate all x[i] * x[k - i] for bot <= i < top |
@@ -122,7 +121,7 @@ outerloop: | |||
122 | // If htop <= bot then main doubled part of the sum is empty | 121 | // If htop <= bot then main doubled part of the sum is empty |
123 | 122 | ||
124 | cmp i, htop | 123 | cmp i, htop |
125 | jnc nosumming | 124 | jnc bignum_sqr_nosumming |
126 | 125 | ||
127 | // Use a moving pointer for [y] = x[k-i] for the cofactor | 126 | // Use a moving pointer for [y] = x[k-i] for the cofactor |
128 | 127 | ||
@@ -132,7 +131,7 @@ outerloop: | |||
132 | 131 | ||
133 | // Do the main part of the sum x[i] * x[k - i] for 2 * i < k | 132 | // Do the main part of the sum x[i] * x[k - i] for 2 * i < k |
134 | 133 | ||
135 | innerloop: | 134 | bignum_sqr_innerloop: |
136 | mov a, [x+8*i] | 135 | mov a, [x+8*i] |
137 | mul QWORD PTR [y] | 136 | mul QWORD PTR [y] |
138 | add l, a | 137 | add l, a |
@@ -141,7 +140,7 @@ innerloop: | |||
141 | sub y, 8 | 140 | sub y, 8 |
142 | inc i | 141 | inc i |
143 | cmp i, htop | 142 | cmp i, htop |
144 | jc innerloop | 143 | jc bignum_sqr_innerloop |
145 | 144 | ||
146 | // Now double it | 145 | // Now double it |
147 | 146 | ||
@@ -151,11 +150,11 @@ innerloop: | |||
151 | 150 | ||
152 | // If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term | 151 | // If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term |
153 | 152 | ||
154 | nosumming: | 153 | bignum_sqr_nosumming: |
155 | test k, 1 | 154 | test k, 1 |
156 | jnz innerend | 155 | jnz bignum_sqr_innerend |
157 | cmp i, n | 156 | cmp i, n |
158 | jnc innerend | 157 | jnc bignum_sqr_innerend |
159 | 158 | ||
160 | mov a, [x+8*i] | 159 | mov a, [x+8*i] |
161 | mul a | 160 | mul a |
@@ -165,7 +164,7 @@ nosumming: | |||
165 | 164 | ||
166 | // Now add the local sum into the global sum, store and shift | 165 | // Now add the local sum into the global sum, store and shift |
167 | 166 | ||
168 | innerend: | 167 | bignum_sqr_innerend: |
169 | add l, ll | 168 | add l, ll |
170 | mov [z+8*k], l | 169 | mov [z+8*k], l |
171 | adc h, hh | 170 | adc h, hh |
@@ -175,11 +174,11 @@ innerend: | |||
175 | 174 | ||
176 | inc k | 175 | inc k |
177 | cmp k, p | 176 | cmp k, p |
178 | jc outerloop | 177 | jc bignum_sqr_outerloop |
179 | 178 | ||
180 | // Restore registers and return | 179 | // Restore registers and return |
181 | 180 | ||
182 | end: | 181 | bignum_sqr_end: |
183 | pop r15 | 182 | pop r15 |
184 | pop r14 | 183 | pop r14 |
185 | pop r13 | 184 | pop r13 |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S index 7c534ae907..cb0eec0eea 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_4_8_alt.S | |||
@@ -16,8 +16,8 @@ | |||
16 | // Square, z := x^2 | 16 | // Square, z := x^2 |
17 | // Input x[4]; output z[8] | 17 | // Input x[4]; output z[8] |
18 | // | 18 | // |
19 | // extern void bignum_sqr_4_8_alt | 19 | // extern void bignum_sqr_4_8_alt(uint64_t z[static 8], |
20 | // (uint64_t z[static 8], uint64_t x[static 4]); | 20 | // const uint64_t x[static 4]); |
21 | // | 21 | // |
22 | // Standard x86-64 ABI: RDI = z, RSI = x | 22 | // Standard x86-64 ABI: RDI = z, RSI = x |
23 | // Microsoft x64 ABI: RCX = z, RDX = x | 23 | // Microsoft x64 ABI: RCX = z, RDX = x |
@@ -71,7 +71,7 @@ | |||
71 | adc c, 0 | 71 | adc c, 0 |
72 | 72 | ||
73 | S2N_BN_SYMBOL(bignum_sqr_4_8_alt): | 73 | S2N_BN_SYMBOL(bignum_sqr_4_8_alt): |
74 | _CET_ENDBR | 74 | _CET_ENDBR |
75 | 75 | ||
76 | #if WINDOWS_ABI | 76 | #if WINDOWS_ABI |
77 | push rdi | 77 | push rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S index ac0b6f96c2..04577d56cf 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sqr_8_16_alt.S | |||
@@ -16,7 +16,8 @@ | |||
16 | // Square, z := x^2 | 16 | // Square, z := x^2 |
17 | // Input x[8]; output z[16] | 17 | // Input x[8]; output z[16] |
18 | // | 18 | // |
19 | // extern void bignum_sqr_8_16_alt (uint64_t z[static 16], uint64_t x[static 8]); | 19 | // extern void bignum_sqr_8_16_alt(uint64_t z[static 16], |
20 | // const uint64_t x[static 8]); | ||
20 | // | 21 | // |
21 | // Standard x86-64 ABI: RDI = z, RSI = x | 22 | // Standard x86-64 ABI: RDI = z, RSI = x |
22 | // Microsoft x64 ABI: RCX = z, RDX = x | 23 | // Microsoft x64 ABI: RCX = z, RDX = x |
@@ -103,7 +104,7 @@ | |||
103 | adc c, 0 | 104 | adc c, 0 |
104 | 105 | ||
105 | S2N_BN_SYMBOL(bignum_sqr_8_16_alt): | 106 | S2N_BN_SYMBOL(bignum_sqr_8_16_alt): |
106 | _CET_ENDBR | 107 | _CET_ENDBR |
107 | 108 | ||
108 | #if WINDOWS_ABI | 109 | #if WINDOWS_ABI |
109 | push rdi | 110 | push rdi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S b/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S index 3ff8a30510..a18e86ba7c 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S +++ b/src/lib/libcrypto/bn/arch/amd64/bignum_sub.S | |||
@@ -16,9 +16,8 @@ | |||
16 | // Subtract, z := x - y | 16 | // Subtract, z := x - y |
17 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] | 17 | // Inputs x[m], y[n]; outputs function return (carry-out) and z[p] |
18 | // | 18 | // |
19 | // extern uint64_t bignum_sub | 19 | // extern uint64_t bignum_sub(uint64_t p, uint64_t *z, uint64_t m, |
20 | // (uint64_t p, uint64_t *z, | 20 | // const uint64_t *x, uint64_t n, const uint64_t *y); |
21 | // uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); | ||
22 | // | 21 | // |
23 | // Does the z := x - y operation, truncating modulo p words in general and | 22 | // Does the z := x - y operation, truncating modulo p words in general and |
24 | // returning a top borrow (0 or 1) in the p'th place, only subtracting input | 23 | // returning a top borrow (0 or 1) in the p'th place, only subtracting input |
@@ -49,7 +48,7 @@ | |||
49 | 48 | ||
50 | 49 | ||
51 | S2N_BN_SYMBOL(bignum_sub): | 50 | S2N_BN_SYMBOL(bignum_sub): |
52 | _CET_ENDBR | 51 | _CET_ENDBR |
53 | 52 | ||
54 | #if WINDOWS_ABI | 53 | #if WINDOWS_ABI |
55 | push rdi | 54 | push rdi |
@@ -75,7 +74,7 @@ S2N_BN_SYMBOL(bignum_sub): | |||
75 | cmp p, n | 74 | cmp p, n |
76 | cmovc n, p | 75 | cmovc n, p |
77 | cmp m, n | 76 | cmp m, n |
78 | jc ylonger | 77 | jc bignum_sub_ylonger |
79 | 78 | ||
80 | // The case where x is longer or of the same size (p >= m >= n) | 79 | // The case where x is longer or of the same size (p >= m >= n) |
81 | 80 | ||
@@ -83,32 +82,32 @@ S2N_BN_SYMBOL(bignum_sub): | |||
83 | sub m, n | 82 | sub m, n |
84 | inc m | 83 | inc m |
85 | test n, n | 84 | test n, n |
86 | jz xtest | 85 | jz bignum_sub_xtest |
87 | xmainloop: | 86 | bignum_sub_xmainloop: |
88 | mov a, [x+8*i] | 87 | mov a, [x+8*i] |
89 | sbb a, [y+8*i] | 88 | sbb a, [y+8*i] |
90 | mov [z+8*i],a | 89 | mov [z+8*i],a |
91 | inc i | 90 | inc i |
92 | dec n | 91 | dec n |
93 | jnz xmainloop | 92 | jnz bignum_sub_xmainloop |
94 | jmp xtest | 93 | jmp bignum_sub_xtest |
95 | xtoploop: | 94 | bignum_sub_xtoploop: |
96 | mov a, [x+8*i] | 95 | mov a, [x+8*i] |
97 | sbb a, 0 | 96 | sbb a, 0 |
98 | mov [z+8*i],a | 97 | mov [z+8*i],a |
99 | inc i | 98 | inc i |
100 | xtest: | 99 | bignum_sub_xtest: |
101 | dec m | 100 | dec m |
102 | jnz xtoploop | 101 | jnz bignum_sub_xtoploop |
103 | sbb a, a | 102 | sbb a, a |
104 | test p, p | 103 | test p, p |
105 | jz tailskip | 104 | jz bignum_sub_tailskip |
106 | tailloop: | 105 | bignum_sub_tailloop: |
107 | mov [z+8*i],a | 106 | mov [z+8*i],a |
108 | inc i | 107 | inc i |
109 | dec p | 108 | dec p |
110 | jnz tailloop | 109 | jnz bignum_sub_tailloop |
111 | tailskip: | 110 | bignum_sub_tailskip: |
112 | neg a | 111 | neg a |
113 | #if WINDOWS_ABI | 112 | #if WINDOWS_ABI |
114 | pop rsi | 113 | pop rsi |
@@ -118,29 +117,29 @@ tailskip: | |||
118 | 117 | ||
119 | // The case where y is longer (p >= n > m) | 118 | // The case where y is longer (p >= n > m) |
120 | 119 | ||
121 | ylonger: | 120 | bignum_sub_ylonger: |
122 | 121 | ||
123 | sub p, n | 122 | sub p, n |
124 | sub n, m | 123 | sub n, m |
125 | test m, m | 124 | test m, m |
126 | jz ytoploop | 125 | jz bignum_sub_ytoploop |
127 | ymainloop: | 126 | bignum_sub_ymainloop: |
128 | mov a, [x+8*i] | 127 | mov a, [x+8*i] |
129 | sbb a, [y+8*i] | 128 | sbb a, [y+8*i] |
130 | mov [z+8*i],a | 129 | mov [z+8*i],a |
131 | inc i | 130 | inc i |
132 | dec m | 131 | dec m |
133 | jnz ymainloop | 132 | jnz bignum_sub_ymainloop |
134 | ytoploop: | 133 | bignum_sub_ytoploop: |
135 | mov ashort, 0 | 134 | mov ashort, 0 |
136 | sbb a, [y+8*i] | 135 | sbb a, [y+8*i] |
137 | mov [z+8*i],a | 136 | mov [z+8*i],a |
138 | inc i | 137 | inc i |
139 | dec n | 138 | dec n |
140 | jnz ytoploop | 139 | jnz bignum_sub_ytoploop |
141 | sbb a, a | 140 | sbb a, a |
142 | test p, p | 141 | test p, p |
143 | jnz tailloop | 142 | jnz bignum_sub_tailloop |
144 | neg a | 143 | neg a |
145 | #if WINDOWS_ABI | 144 | #if WINDOWS_ABI |
146 | pop rsi | 145 | pop rsi |
diff --git a/src/lib/libcrypto/bn/arch/amd64/word_clz.S b/src/lib/libcrypto/bn/arch/amd64/word_clz.S index 3926fcd4b0..84c9c8275d 100644 --- a/src/lib/libcrypto/bn/arch/amd64/word_clz.S +++ b/src/lib/libcrypto/bn/arch/amd64/word_clz.S | |||
@@ -16,7 +16,7 @@ | |||
16 | // Count leading zero bits in a single word | 16 | // Count leading zero bits in a single word |
17 | // Input a; output function return | 17 | // Input a; output function return |
18 | // | 18 | // |
19 | // extern uint64_t word_clz (uint64_t a); | 19 | // extern uint64_t word_clz(uint64_t a); |
20 | // | 20 | // |
21 | // Standard x86-64 ABI: RDI = a, returns RAX | 21 | // Standard x86-64 ABI: RDI = a, returns RAX |
22 | // Microsoft x64 ABI: RCX = a, returns RAX | 22 | // Microsoft x64 ABI: RCX = a, returns RAX |
@@ -30,7 +30,7 @@ | |||
30 | .text | 30 | .text |
31 | 31 | ||
32 | S2N_BN_SYMBOL(word_clz): | 32 | S2N_BN_SYMBOL(word_clz): |
33 | _CET_ENDBR | 33 | _CET_ENDBR |
34 | 34 | ||
35 | #if WINDOWS_ABI | 35 | #if WINDOWS_ABI |
36 | push rdi | 36 | push rdi |