diff options
Diffstat (limited to 'src/lib/libcrypto/bn/asm/vms.mar')
-rw-r--r-- | src/lib/libcrypto/bn/asm/vms.mar | 254 |
1 files changed, 141 insertions, 113 deletions
diff --git a/src/lib/libcrypto/bn/asm/vms.mar b/src/lib/libcrypto/bn/asm/vms.mar index 465f2774b6..aefab15cdb 100644 --- a/src/lib/libcrypto/bn/asm/vms.mar +++ b/src/lib/libcrypto/bn/asm/vms.mar | |||
@@ -1,4 +1,4 @@ | |||
1 | .title vax_bn_mul_add_word unsigned multiply & add, 32*32+32+32=>64 | 1 | .title vax_bn_mul_add_words unsigned multiply & add, 32*32+32+32=>64 |
2 | ; | 2 | ; |
3 | ; w.j.m. 15-jan-1999 | 3 | ; w.j.m. 15-jan-1999 |
4 | ; | 4 | ; |
@@ -59,7 +59,7 @@ w=16 ;(AP) w by value (input) | |||
59 | movl r6,r0 ; return c | 59 | movl r6,r0 ; return c |
60 | ret | 60 | ret |
61 | 61 | ||
62 | .title vax_bn_mul_word unsigned multiply & add, 32*32+32=>64 | 62 | .title vax_bn_mul_words unsigned multiply & add, 32*32+32=>64 |
63 | ; | 63 | ; |
64 | ; w.j.m. 15-jan-1999 | 64 | ; w.j.m. 15-jan-1999 |
65 | ; | 65 | ; |
@@ -172,147 +172,175 @@ n=12 ;(AP) n by value (input) | |||
172 | ; } | 172 | ; } |
173 | ; | 173 | ; |
174 | ; Using EDIV would be very easy, if it didn't do signed calculations. | 174 | ; Using EDIV would be very easy, if it didn't do signed calculations. |
175 | ; Therefore, som extra things have to happen around it. The way to | 175 | ; Any time any of the input numbers are signed, there are problems, |
176 | ; handle that is to shift all operands right one step (basically dividing | 176 | ; usually with integer overflow, at which point it returns useless |
177 | ; them by 2) and handle the different cases depending on what the lowest | 177 | ; data (the quotient gets the value of l, and the remainder becomes 0). |
178 | ; bit of each operand was. | ||
179 | ; | 178 | ; |
180 | ; To start with, let's define the following: | 179 | ; If it was just for the dividend, it would be very easy, just divide |
180 | ; it by 2 (unsigned), do the division, multiply the resulting quotient | ||
181 | ; and remainder by 2, add the bit that was dropped when dividing by 2 | ||
182 | ; to the remainder, and do some adjustment so the remainder doesn't | ||
183 | ; end up larger than the divisor. For some cases when the divisor is | ||
184 | ; negative (from EDIV's point of view, i.e. when the highest bit is set), | ||
185 | ; dividing the dividend by 2 isn't enough, and since some operations | ||
186 | ; might generate integer overflows even when the dividend is divided by | ||
187 | ; 4 (when the high part of the shifted down dividend ends up being exactly | ||
188 | ; half of the divisor, the result is the quotient 0x80000000, which is | ||
189 | ; negative...) it needs to be divided by 8. Furthermore, the divisor needs | ||
190 | ; to be divided by 2 (unsigned) as well, to avoid more problems with the sign. | ||
191 | ; In this case, a little extra fiddling with the remainder is required. | ||
181 | ; | 192 | ; |
182 | ; a' = l & 1 | 193 | ; So, the simplest way to handle this is always to divide the dividend |
183 | ; a2 = <h,l> >> 1 # UNSIGNED shift! | 194 | ; by 8, and to divide the divisor by 2 if it's highest bit is set. |
184 | ; b' = d & 1 | 195 | ; After EDIV has been used, the quotient gets multiplied by 8 if the |
185 | ; b2 = d >> 1 # UNSIGNED shift! | 196 | ; original divisor was positive, otherwise 4. The remainder, oddly |
197 | ; enough, is *always* multiplied by 8. | ||
198 | ; NOTE: in the case mentioned above, where the high part of the shifted | ||
199 | ; down dividend ends up being exactly half the shifted down divisor, we | ||
200 | ; end up with a 33 bit quotient. That's no problem however, it usually | ||
201 | ; means we have ended up with a too large remainder as well, and the | ||
202 | ; problem is fixed by the last part of the algorithm (next paragraph). | ||
186 | ; | 203 | ; |
187 | ; Now, use EDIV to calculate a quotient and a remainder: | 204 | ; The routine ends with comparing the resulting remainder with the |
205 | ; original divisor and if the remainder is larger, subtract the | ||
206 | ; original divisor from it, and increase the quotient by 1. This is | ||
207 | ; done until the remainder is smaller than the divisor. | ||
188 | ; | 208 | ; |
189 | ; q'' = a2/b2 | 209 | ; The complete algorithm looks like this: |
190 | ; r'' = a2 - q''*b2 | ||
191 | ; | 210 | ; |
192 | ; If b' is 0, the quotient is already correct, we just need to adjust the | 211 | ; d' = d |
193 | ; remainder: | 212 | ; l' = l & 7 |
213 | ; [h,l] = [h,l] >> 3 | ||
214 | ; [q,r] = floor([h,l] / d) # This is the EDIV operation | ||
215 | ; if (q < 0) q = -q # I doubt this is necessary any more | ||
194 | ; | 216 | ; |
195 | ; if (b' == 0) | 217 | ; r' = r >> 29 |
196 | ; { | 218 | ; if (d' >= 0) |
197 | ; r = 2*r'' + a' | 219 | ; q' = q >> 29 |
198 | ; q = q'' | 220 | ; q = q << 3 |
199 | ; } | 221 | ; else |
200 | ; | 222 | ; q' = q >> 30 |
201 | ; If b' is 1, we need to do other adjustements. The first thought is the | 223 | ; q = q << 2 |
202 | ; following (note that r' will not always have the right value, but an | 224 | ; r = (r << 3) + l' |
203 | ; adjustement follows further down): | ||
204 | ; | ||
205 | ; if (b' == 1) | ||
206 | ; { | ||
207 | ; q' = q'' | ||
208 | ; r' = a - q'*b | ||
209 | ; | ||
210 | ; However, one can note the folowing relationship: | ||
211 | ; | ||
212 | ; r'' = a2 - q''*b2 | ||
213 | ; => 2*r'' = 2*a2 - 2*q''*b2 | ||
214 | ; = { a = 2*a2 + a', b = 2*b2 + b' = 2*b2 + 1, | ||
215 | ; q' = q'' } | ||
216 | ; = a - a' - q'*(b - 1) | ||
217 | ; = a - q'*b - a' + q' | ||
218 | ; = r' - a' + q' | ||
219 | ; => r' = 2*r'' - q' + a' | ||
220 | ; | 225 | ; |
221 | ; This enables us to use r'' instead of discarding and calculating another | 226 | ; if (d' < 0) |
222 | ; modulo: | ||
223 | ; | ||
224 | ; if (b' == 1) | ||
225 | ; { | 227 | ; { |
226 | ; q' = q'' | 228 | ; [r',r] = [r',r] - q |
227 | ; r' = (r'' << 1) - q' + a' | 229 | ; while ([r',r] < 0) |
228 | ; | ||
229 | ; Now, all we have to do is adjust r', because it might be < 0: | ||
230 | ; | ||
231 | ; while (r' < 0) | ||
232 | ; { | 230 | ; { |
233 | ; r' = r' + b | 231 | ; [r',r] = [r',r] + d |
234 | ; q' = q' - 1 | 232 | ; [q',q] = [q',q] - 1 |
235 | ; } | 233 | ; } |
236 | ; } | 234 | ; } |
237 | ; | 235 | ; |
238 | ; return q' | 236 | ; while ([r',r] >= d') |
237 | ; { | ||
238 | ; [r',r] = [r',r] - d' | ||
239 | ; [q',q] = [q',q] + 1 | ||
240 | ; } | ||
241 | ; | ||
242 | ; return q | ||
239 | 243 | ||
240 | h=4 ;(AP) h by value (input) | 244 | h=4 ;(AP) h by value (input) |
241 | l=8 ;(AP) l by value (input) | 245 | l=8 ;(AP) l by value (input) |
242 | d=12 ;(AP) d by value (input) | 246 | d=12 ;(AP) d by value (input) |
243 | 247 | ||
244 | ;aprim=r5 | 248 | ;r2 = l, q |
245 | ;a2=r6 | 249 | ;r3 = h, r |
246 | ;a20=r6 | 250 | ;r4 = d |
247 | ;a21=r7 | 251 | ;r5 = l' |
248 | ;bprim=r8 | 252 | ;r6 = r' |
249 | ;b2=r9 | 253 | ;r7 = d' |
250 | ;qprim=r10 ; initially used as q'' | 254 | ;r8 = q' |
251 | ;rprim=r11 ; initially used as r'' | ||
252 | |||
253 | 255 | ||
254 | .psect code,nowrt | 256 | .psect code,nowrt |
255 | 257 | ||
256 | .entry bn_div_words,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10,r11> | 258 | .entry bn_div_words,^m<r2,r3,r4,r5,r6,r7,r8> |
257 | movl l(ap),r2 | 259 | movl l(ap),r2 |
258 | movl h(ap),r3 | 260 | movl h(ap),r3 |
259 | movl d(ap),r4 | 261 | movl d(ap),r4 |
260 | 262 | ||
261 | movl #0,r5 | 263 | bicl3 #^XFFFFFFF8,r2,r5 ; l' = l & 7 |
262 | movl #0,r8 | 264 | bicl3 #^X00000007,r2,r2 |
263 | movl #0,r0 | ||
264 | ; movl #0,r1 | ||
265 | 265 | ||
266 | rotl #-1,r2,r6 ; a20 = l >> 1 (almost) | 266 | bicl3 #^XFFFFFFF8,r3,r6 |
267 | rotl #-1,r3,r7 ; a21 = h >> 1 (almost) | 267 | bicl3 #^X00000007,r3,r3 |
268 | rotl #-1,r4,r9 ; b2 = d >> 1 (almost) | 268 | |
269 | addl r6,r2 | ||
269 | 270 | ||
270 | tstl r6 | 271 | rotl #-3,r2,r2 ; l = l >> 3 |
271 | bgeq 1$ | 272 | rotl #-3,r3,r3 ; h = h >> 3 |
272 | xorl2 #^X80000000,r6 ; fixup a20 so highest bit is 0 | 273 | |
273 | incl r5 ; a' = 1 | 274 | movl r4,r7 ; d' = d |
274 | 1$: | 275 | |
275 | tstl r7 | 276 | movl #0,r6 ; r' = 0 |
276 | bgeq 2$ | 277 | movl #0,r8 ; q' = 0 |
277 | xorl2 #^X80000000,r6 ; fixup a20 so highest bit is 1, | 278 | |
278 | ; since that's what was lowest in a21 | 279 | tstl r4 |
279 | xorl2 #^X80000000,r7 ; fixup a21 so highest bit is 1 | ||
280 | 2$: | ||
281 | tstl r9 | ||
282 | beql 666$ ; Uh-oh, the divisor is 0... | 280 | beql 666$ ; Uh-oh, the divisor is 0... |
283 | bgtr 3$ | 281 | bgtr 1$ |
284 | xorl2 #^X80000000,r9 ; fixup b2 so highest bit is 0 | 282 | rotl #-1,r4,r4 ; If d is negative, shift it right. |
285 | incl r8 ; b' = 1 | 283 | bicl2 #^X80000000,r4 ; Since d is then a large number, the |
286 | 3$: | 284 | ; lowest bit is insignificant |
287 | tstl r9 | 285 | ; (contradict that, and I'll fix the problem!) |
288 | bneq 4$ ; if b2 is 0, we know that b' is 1 | 286 | 1$: |
289 | tstl r3 | 287 | ediv r4,r2,r2,r3 ; Do the actual division |
290 | bneq 666$ ; if higher half isn't 0, we overflow | 288 | |
291 | movl r2,r10 ; otherwise, we have our result | 289 | tstl r2 |
292 | brb 42$ ; This is a success, really. | 290 | bgeq 3$ |
293 | 4$: | 291 | mnegl r2,r2 ; if q < 0, negate it |
294 | ediv r9,r6,r10,r11 | 292 | 3$: |
295 | 293 | tstl r7 | |
296 | tstl r8 | 294 | blss 4$ |
297 | bneq 5$ ; If b' != 0, go to the other part | 295 | rotl #3,r2,r2 ; q = q << 3 |
298 | ; addl3 r11,r11,r1 | 296 | bicl3 #^XFFFFFFF8,r2,r8 ; q' gets the high bits from q |
299 | ; addl2 r5,r1 | 297 | bicl3 #^X00000007,r2,r2 |
300 | brb 42$ | 298 | bsb 41$ |
301 | 5$: | 299 | 4$: ; else |
302 | ashl #1,r11,r11 | 300 | rotl #2,r2,r2 ; q = q << 2 |
303 | subl2 r10,r11 | 301 | bicl3 #^XFFFFFFFC,r2,r8 ; q' gets the high bits from q |
304 | addl2 r5,r11 | 302 | bicl3 #^X00000003,r2,r2 |
305 | bgeq 7$ | 303 | 41$: |
306 | 6$: | 304 | rotl #3,r3,r3 ; r = r << 3 |
307 | decl r10 | 305 | bicl3 #^XFFFFFFF8,r3,r6 ; r' gets the high bits from r |
308 | addl2 r4,r11 | 306 | bicl3 #^X00000007,r3,r3 |
309 | blss 6$ | 307 | addl r5,r3 ; r = r + l' |
310 | 7$: | 308 | |
311 | ; movl r11,r1 | 309 | tstl r7 |
310 | bgeq 5$ | ||
311 | bitl #1,r7 | ||
312 | beql 5$ ; if d' < 0 && d' & 1 | ||
313 | subl r2,r3 ; [r',r] = [r',r] - [q',q] | ||
314 | sbwc r8,r6 | ||
315 | 45$: | ||
316 | bgeq 5$ ; while r < 0 | ||
317 | decl r2 ; [q',q] = [q',q] - 1 | ||
318 | sbwc #0,r8 | ||
319 | addl r7,r3 ; [r',r] = [r',r] + d' | ||
320 | adwc #0,r6 | ||
321 | brb 45$ | ||
322 | |||
323 | ; The return points are placed in the middle to keep a short distance from | ||
324 | ; all the branch points | ||
312 | 42$: | 325 | 42$: |
313 | movl r10,r0 | 326 | ; movl r3,r1 |
327 | movl r2,r0 | ||
328 | ret | ||
314 | 666$: | 329 | 666$: |
330 | movl #^XFFFFFFFF,r0 | ||
315 | ret | 331 | ret |
332 | |||
333 | 5$: | ||
334 | tstl r6 | ||
335 | bneq 6$ | ||
336 | cmpl r3,r7 | ||
337 | blssu 42$ ; while [r',r] >= d' | ||
338 | 6$: | ||
339 | subl r7,r3 ; [r',r] = [r',r] - d' | ||
340 | sbwc #0,r6 | ||
341 | incl r2 ; [q',q] = [q',q] + 1 | ||
342 | adwc #0,r8 | ||
343 | brb 5$ | ||
316 | 344 | ||
317 | .title vax_bn_add_words unsigned add of two arrays | 345 | .title vax_bn_add_words unsigned add of two arrays |
318 | ; | 346 | ; |