diff options
author | miod <> | 2014-04-11 22:51:54 +0000 |
---|---|---|
committer | miod <> | 2014-04-11 22:51:54 +0000 |
commit | e4e5cfce71aea090d747d436ab48c4717f78c97c (patch) | |
tree | afd4b98775fa2752df82417abdf319a3af0bb439 /src/lib/libcrypto/arch/vax/bn_asm_vax.S | |
parent | 3c70ae462fc747402d562cd98a2825922441cdda (diff) | |
download | openbsd-e4e5cfce71aea090d747d436ab48c4717f78c97c.tar.gz openbsd-e4e5cfce71aea090d747d436ab48c4717f78c97c.tar.bz2 openbsd-e4e5cfce71aea090d747d436ab48c4717f78c97c.zip |
Move build machinery for libcrypto from libssl/crypto to libcrypto, as well
as configuration files; split manpages and .pc files between libcrypto and
libssl.
No functional change, only there to make engineering easier, and libcrypto
sources are still found in libssl/src/crypto at the moment.
ok reyk@, also discussed with deraadt@ beck@ and the usual crypto suspects.
Diffstat (limited to 'src/lib/libcrypto/arch/vax/bn_asm_vax.S')
-rw-r--r-- | src/lib/libcrypto/arch/vax/bn_asm_vax.S | 436 |
1 files changed, 436 insertions, 0 deletions
diff --git a/src/lib/libcrypto/arch/vax/bn_asm_vax.S b/src/lib/libcrypto/arch/vax/bn_asm_vax.S new file mode 100644 index 0000000000..2969ae9dac --- /dev/null +++ b/src/lib/libcrypto/arch/vax/bn_asm_vax.S | |||
@@ -0,0 +1,436 @@ | |||
1 | # $OpenBSD: bn_asm_vax.S,v 1.1 2014/04/11 22:51:53 miod Exp $ | ||
2 | # $NetBSD: bn_asm_vax.S,v 1.1 2003/11/03 10:22:28 ragge Exp $ | ||
3 | |||
4 | #include <machine/asm.h> | ||
5 | |||
6 | # w.j.m. 15-jan-1999 | ||
7 | # | ||
8 | # it's magic ... | ||
9 | # | ||
10 | # ULONG bn_mul_add_words(ULONG r[],ULONG a[],int n,ULONG w) { | ||
11 | # ULONG c = 0; | ||
12 | # int i; | ||
13 | # for(i = 0; i < n; i++) <c,r[i]> := r[i] + c + a[i] * w ; | ||
14 | # return c; | ||
15 | # } | ||
16 | |||
17 | ENTRY(bn_mul_add_words,R6) | ||
18 | movl 4(%ap),%r2 # *r | ||
19 | movl 8(%ap),%r3 # *a | ||
20 | movl 12(%ap),%r4 # n | ||
21 | movl 16(%ap),%r5 # w | ||
22 | clrl %r6 # return value ("carry") | ||
23 | |||
24 | 0: emul %r5,(%r3),(%r2),%r0 # w * a[0] + r[0] -> r0 | ||
25 | |||
26 | # fixup for "negative" r[] | ||
27 | tstl (%r2) | ||
28 | bgeq 1f | ||
29 | incl %r1 # add 1 to highword | ||
30 | |||
31 | 1: # add saved carry to result | ||
32 | addl2 %r6,%r0 | ||
33 | adwc $0,%r1 | ||
34 | |||
35 | # combined fixup for "negative" w, a[] | ||
36 | tstl %r5 # if w is negative... | ||
37 | bgeq 1f | ||
38 | addl2 (%r3),%r1 # ...add a[0] again to highword | ||
39 | 1: tstl (%r3) # if a[0] is negative... | ||
40 | bgeq 1f | ||
41 | addl2 %r5,%r1 # ...add w again to highword | ||
42 | 1: | ||
43 | movl %r0,(%r2)+ # save low word in dest & advance *r | ||
44 | addl2 $4,%r3 # advance *a | ||
45 | movl %r1,%r6 # high word in r6 for return value | ||
46 | |||
47 | sobgtr %r4,0b # loop? | ||
48 | |||
49 | movl %r6,%r0 | ||
50 | ret | ||
51 | |||
52 | # .title vax_bn_mul_words unsigned multiply & add, 32*32+32=>64 | ||
53 | #; | ||
54 | #; w.j.m. 15-jan-1999 | ||
55 | #; | ||
56 | #; it's magic ... | ||
57 | #; | ||
58 | #; ULONG bn_mul_words(ULONG r[],ULONG a[],int n,ULONG w) { | ||
59 | #; ULONG c = 0; | ||
60 | #; int i; | ||
61 | #; for(i = 0; i < num; i++) <c,r[i]> := a[i] * w + c ; | ||
62 | #; return(c); | ||
63 | #; } | ||
64 | # | ||
65 | |||
66 | ENTRY(bn_mul_words,R6) | ||
67 | movl 4(%ap),%r2 # *r | ||
68 | movl 8(%ap),%r3 # *a | ||
69 | movl 12(%ap),%r4 # n | ||
70 | movl 16(%ap),%r5 # w | ||
71 | clrl %r6 # carry | ||
72 | |||
73 | 0: emul %r5,(%r3),%r6,%r0 # w * a[0] + carry -> r0 | ||
74 | |||
75 | # fixup for "negative" carry | ||
76 | tstl %r6 | ||
77 | bgeq 1f | ||
78 | incl %r1 | ||
79 | |||
80 | 1: # combined fixup for "negative" w, a[] | ||
81 | tstl %r5 | ||
82 | bgeq 1f | ||
83 | addl2 (%r3),%r1 | ||
84 | 1: tstl (%r3) | ||
85 | bgeq 1f | ||
86 | addl2 %r5,%r1 | ||
87 | |||
88 | 1: movl %r0,(%r2)+ | ||
89 | addl2 $4,%r3 | ||
90 | movl %r1,%r6 | ||
91 | |||
92 | sobgtr %r4,0b | ||
93 | |||
94 | movl %r6,%r0 | ||
95 | ret | ||
96 | |||
97 | |||
98 | |||
99 | # .title vax_bn_sqr_words unsigned square, 32*32=>64 | ||
100 | #; | ||
101 | #; w.j.m. 15-jan-1999 | ||
102 | #; | ||
103 | #; it's magic ... | ||
104 | #; | ||
105 | #; void bn_sqr_words(ULONG r[],ULONG a[],int n) { | ||
106 | #; int i; | ||
107 | #; for(i = 0; i < n; i++) <r[2*i+1],r[2*i]> := a[i] * a[i] ; | ||
108 | #; } | ||
109 | # | ||
110 | |||
111 | ENTRY(bn_sqr_words,0) | ||
112 | movl 4(%ap),%r2 # r | ||
113 | movl 8(%ap),%r3 # a | ||
114 | movl 12(%ap),%r4 # n | ||
115 | |||
116 | 0: movl (%r3)+,%r5 # r5 = a[] & advance | ||
117 | |||
118 | emul %r5,%r5,$0,%r0 # a[0] * a[0] + 0 -> r0 | ||
119 | |||
120 | # fixup for "negative" a[] | ||
121 | tstl %r5 | ||
122 | bgeq 1f | ||
123 | addl2 %r5,%r1 | ||
124 | addl2 %r5,%r1 | ||
125 | |||
126 | 1: movq %r0,(%r2)+ # store 64-bit result | ||
127 | |||
128 | sobgtr %r4,0b # loop | ||
129 | |||
130 | ret | ||
131 | |||
132 | |||
133 | # .title vax_bn_div_words unsigned divide | ||
134 | #; | ||
135 | #; Richard Levitte 20-Nov-2000 | ||
136 | #; | ||
137 | #; ULONG bn_div_words(ULONG h, ULONG l, ULONG d) | ||
138 | #; { | ||
139 | #; return ((ULONG)((((ULLONG)h)<<32)|l) / (ULLONG)d); | ||
140 | #; } | ||
141 | #; | ||
142 | #; Using EDIV would be very easy, if it didn't do signed calculations. | ||
143 | #; Any time any of the input numbers are signed, there are problems, | ||
144 | #; usually with integer overflow, at which point it returns useless | ||
145 | #; data (the quotient gets the value of l, and the remainder becomes 0). | ||
146 | #; | ||
147 | #; If it was just for the dividend, it would be very easy, just divide | ||
148 | #; it by 2 (unsigned), do the division, multiply the resulting quotient | ||
149 | #; and remainder by 2, add the bit that was dropped when dividing by 2 | ||
150 | #; to the remainder, and do some adjustment so the remainder doesn't | ||
151 | #; end up larger than the divisor. For some cases when the divisor is | ||
152 | #; negative (from EDIV's point of view, i.e. when the highest bit is set), | ||
153 | #; dividing the dividend by 2 isn't enough, and since some operations | ||
154 | #; might generate integer overflows even when the dividend is divided by | ||
155 | #; 4 (when the high part of the shifted down dividend ends up being exactly | ||
156 | #; half of the divisor, the result is the quotient 0x80000000, which is | ||
157 | #; negative...) it needs to be divided by 8. Furthermore, the divisor needs | ||
158 | #; to be divided by 2 (unsigned) as well, to avoid more problems with the sign. | ||
159 | #; In this case, a little extra fiddling with the remainder is required. | ||
160 | #; | ||
161 | #; So, the simplest way to handle this is always to divide the dividend | ||
162 | #; by 8, and to divide the divisor by 2 if it's highest bit is set. | ||
163 | #; After EDIV has been used, the quotient gets multiplied by 8 if the | ||
164 | #; original divisor was positive, otherwise 4. The remainder, oddly | ||
165 | #; enough, is *always* multiplied by 8. | ||
166 | #; NOTE: in the case mentioned above, where the high part of the shifted | ||
167 | #; down dividend ends up being exactly half the shifted down divisor, we | ||
168 | #; end up with a 33 bit quotient. That's no problem however, it usually | ||
169 | #; means we have ended up with a too large remainder as well, and the | ||
170 | #; problem is fixed by the last part of the algorithm (next paragraph). | ||
171 | #; | ||
172 | #; The routine ends with comparing the resulting remainder with the | ||
173 | #; original divisor and if the remainder is larger, subtract the | ||
174 | #; original divisor from it, and increase the quotient by 1. This is | ||
175 | #; done until the remainder is smaller than the divisor. | ||
176 | #; | ||
177 | #; The complete algorithm looks like this: | ||
178 | #; | ||
179 | #; d' = d | ||
180 | #; l' = l & 7 | ||
181 | #; [h,l] = [h,l] >> 3 | ||
182 | #; [q,r] = floor([h,l] / d) # This is the EDIV operation | ||
183 | #; if (q < 0) q = -q # I doubt this is necessary any more | ||
184 | #; | ||
185 | #; r' = r >> 29 | ||
186 | #; if (d' >= 0) | ||
187 | #; q' = q >> 29 | ||
188 | #; q = q << 3 | ||
189 | #; else | ||
190 | #; q' = q >> 30 | ||
191 | #; q = q << 2 | ||
192 | #; r = (r << 3) + l' | ||
193 | #; | ||
194 | #; if (d' < 0) | ||
195 | #; { | ||
196 | #; [r',r] = [r',r] - q | ||
197 | #; while ([r',r] < 0) | ||
198 | #; { | ||
199 | #; [r',r] = [r',r] + d | ||
200 | #; [q',q] = [q',q] - 1 | ||
201 | #; } | ||
202 | #; } | ||
203 | #; | ||
204 | #; while ([r',r] >= d') | ||
205 | #; { | ||
206 | #; [r',r] = [r',r] - d' | ||
207 | #; [q',q] = [q',q] + 1 | ||
208 | #; } | ||
209 | #; | ||
210 | #; return q | ||
211 | # | ||
212 | #;r2 = l, q | ||
213 | #;r3 = h, r | ||
214 | #;r4 = d | ||
215 | #;r5 = l' | ||
216 | #;r6 = r' | ||
217 | #;r7 = d' | ||
218 | #;r8 = q' | ||
219 | # | ||
220 | |||
221 | ENTRY(bn_div_words,R6|R7|R8) | ||
222 | movl 4(%ap),%r3 # h | ||
223 | movl 8(%ap),%r2 # l | ||
224 | movl 12(%ap),%r4 # d | ||
225 | |||
226 | bicl3 $-8,%r2,%r5 # l' = l & 7 | ||
227 | bicl3 $7,%r2,%r2 | ||
228 | |||
229 | bicl3 $-8,%r3,%r6 | ||
230 | bicl3 $7,%r3,%r3 | ||
231 | |||
232 | addl2 %r6,%r2 | ||
233 | |||
234 | rotl $-3,%r2,%r2 # l = l >> 3 | ||
235 | rotl $-3,%r3,%r3 # h = h >> 3 | ||
236 | |||
237 | movl %r4,%r7 # d' = d | ||
238 | |||
239 | clrl %r6 # r' = 0 | ||
240 | clrl %r8 # q' = 0 | ||
241 | |||
242 | tstl %r4 | ||
243 | beql 0f # Uh-oh, the divisor is 0... | ||
244 | bgtr 1f | ||
245 | rotl $-1,%r4,%r4 # If d is negative, shift it right. | ||
246 | bicl2 $0x80000000,%r4 # Since d is then a large number, the | ||
247 | # lowest bit is insignificant | ||
248 | # (contradict that, and I'll fix the problem!) | ||
249 | 1: | ||
250 | ediv %r4,%r2,%r2,%r3 # Do the actual division | ||
251 | |||
252 | tstl %r2 | ||
253 | bgeq 1f | ||
254 | mnegl %r2,%r2 # if q < 0, negate it | ||
255 | 1: | ||
256 | tstl %r7 | ||
257 | blss 1f | ||
258 | rotl $3,%r2,%r2 # q = q << 3 | ||
259 | bicl3 $-8,%r2,%r8 # q' gets the high bits from q | ||
260 | bicl3 $7,%r2,%r2 | ||
261 | brb 2f | ||
262 | |||
263 | 1: # else | ||
264 | rotl $2,%r2,%r2 # q = q << 2 | ||
265 | bicl3 $-4,%r2,%r8 # q' gets the high bits from q | ||
266 | bicl3 $3,%r2,%r2 | ||
267 | 2: | ||
268 | rotl $3,%r3,%r3 # r = r << 3 | ||
269 | bicl3 $-8,%r3,%r6 # r' gets the high bits from r | ||
270 | bicl3 $7,%r3,%r3 | ||
271 | addl2 %r5,%r3 # r = r + l' | ||
272 | |||
273 | tstl %r7 | ||
274 | bgeq 5f | ||
275 | bitl $1,%r7 | ||
276 | beql 5f # if d' < 0 && d' & 1 | ||
277 | subl2 %r2,%r3 # [r',r] = [r',r] - [q',q] | ||
278 | sbwc %r8,%r6 | ||
279 | 3: | ||
280 | bgeq 5f # while r < 0 | ||
281 | decl %r2 # [q',q] = [q',q] - 1 | ||
282 | sbwc $0,%r8 | ||
283 | addl2 %r7,%r3 # [r',r] = [r',r] + d' | ||
284 | adwc $0,%r6 | ||
285 | brb 3b | ||
286 | |||
287 | # The return points are placed in the middle to keep a short distance from | ||
288 | # all the branch points | ||
289 | 1: | ||
290 | # movl %r3,%r1 | ||
291 | movl %r2,%r0 | ||
292 | ret | ||
293 | 0: | ||
294 | movl $-1,%r0 | ||
295 | ret | ||
296 | 5: | ||
297 | tstl %r6 | ||
298 | bneq 6f | ||
299 | cmpl %r3,%r7 | ||
300 | blssu 1b # while [r',r] >= d' | ||
301 | 6: | ||
302 | subl2 %r7,%r3 # [r',r] = [r',r] - d' | ||
303 | sbwc $0,%r6 | ||
304 | incl %r2 # [q',q] = [q',q] + 1 | ||
305 | adwc $0,%r8 | ||
306 | brb 5b | ||
307 | |||
308 | |||
309 | |||
310 | # .title vax_bn_add_words unsigned add of two arrays | ||
311 | #; | ||
312 | #; Richard Levitte 20-Nov-2000 | ||
313 | #; | ||
314 | #; ULONG bn_add_words(ULONG r[], ULONG a[], ULONG b[], int n) { | ||
315 | #; ULONG c = 0; | ||
316 | #; int i; | ||
317 | #; for (i = 0; i < n; i++) <c,r[i]> = a[i] + b[i] + c; | ||
318 | #; return(c); | ||
319 | #; } | ||
320 | # | ||
321 | |||
322 | ENTRY(bn_add_words,0) | ||
323 | movl 4(%ap),%r2 # r | ||
324 | movl 8(%ap),%r3 # a | ||
325 | movl 12(%ap),%r4 # b | ||
326 | movl 16(%ap),%r5 # n | ||
327 | clrl %r0 | ||
328 | |||
329 | tstl %r5 | ||
330 | bleq 1f | ||
331 | |||
332 | 0: movl (%r3)+,%r1 # carry untouched | ||
333 | adwc (%r4)+,%r1 # carry used and touched | ||
334 | movl %r1,(%r2)+ # carry untouched | ||
335 | sobgtr %r5,0b # carry untouched | ||
336 | |||
337 | adwc $0,%r0 | ||
338 | 1: ret | ||
339 | |||
340 | #; | ||
341 | #; Richard Levitte 20-Nov-2000 | ||
342 | #; | ||
343 | #; ULONG bn_sub_words(ULONG r[], ULONG a[], ULONG b[], int n) { | ||
344 | #; ULONG c = 0; | ||
345 | #; int i; | ||
346 | #; for (i = 0; i < n; i++) <c,r[i]> = a[i] - b[i] - c; | ||
347 | #; return(c); | ||
348 | #; } | ||
349 | # | ||
350 | |||
351 | ENTRY(bn_sub_words,R6) | ||
352 | movl 4(%ap),%r2 # r | ||
353 | movl 8(%ap),%r3 # a | ||
354 | movl 12(%ap),%r4 # b | ||
355 | movl 16(%ap),%r5 # n | ||
356 | clrl %r0 | ||
357 | |||
358 | tstl %r5 | ||
359 | bleq 1f | ||
360 | |||
361 | 0: movl (%r3)+,%r6 # carry untouched | ||
362 | sbwc (%r4)+,%r6 # carry used and touched | ||
363 | movl %r6,(%r2)+ # carry untouched | ||
364 | sobgtr %r5,0b # carry untouched | ||
365 | |||
366 | 1: adwc $0,%r0 | ||
367 | ret | ||
368 | |||
369 | # | ||
370 | # Ragge 20-Sep-2003 | ||
371 | # | ||
372 | # Multiply a vector of 4/8 longword by another. | ||
373 | # Uses two loops and 16/64 emuls. | ||
374 | # | ||
375 | |||
376 | ENTRY(bn_mul_comba4,R6|R7|R8|R9) | ||
377 | movl $4,%r9 # 4*4 | ||
378 | brb 6f | ||
379 | |||
380 | ENTRY(bn_mul_comba8,R6|R7|R8|R9) | ||
381 | movl $8,%r9 # 8*8 | ||
382 | |||
383 | 6: movl 8(%ap),%r3 # a[] | ||
384 | movl 12(%ap),%r7 # b[] | ||
385 | brb 5f | ||
386 | |||
387 | ENTRY(bn_sqr_comba4,R6|R7|R8|R9) | ||
388 | movl $4,%r9 # 4*4 | ||
389 | brb 0f | ||
390 | |||
391 | ENTRY(bn_sqr_comba8,R6|R7|R8|R9) | ||
392 | movl $8,%r9 # 8*8 | ||
393 | |||
394 | 0: | ||
395 | movl 8(%ap),%r3 # a[] | ||
396 | movl %r3,%r7 # a[] | ||
397 | |||
398 | 5: movl 4(%ap),%r5 # r[] | ||
399 | movl %r9,%r8 | ||
400 | |||
401 | clrq (%r5) # clear destinatino, for add. | ||
402 | clrq 8(%r5) | ||
403 | clrq 16(%r5) # these only needed for comba8 | ||
404 | clrq 24(%r5) | ||
405 | |||
406 | 2: clrl %r4 # carry | ||
407 | movl %r9,%r6 # inner loop count | ||
408 | movl (%r7)+,%r2 # value to multiply with | ||
409 | |||
410 | 1: emul %r2,(%r3),%r4,%r0 | ||
411 | tstl %r4 | ||
412 | bgeq 3f | ||
413 | incl %r1 | ||
414 | 3: tstl %r2 | ||
415 | bgeq 3f | ||
416 | addl2 (%r3),%r1 | ||
417 | 3: tstl (%r3) | ||
418 | bgeq 3f | ||
419 | addl2 %r2,%r1 | ||
420 | |||
421 | 3: addl2 %r0,(%r5)+ # add to destination | ||
422 | adwc $0,%r1 # remember carry | ||
423 | movl %r1,%r4 # add carry in next emul | ||
424 | addl2 $4,%r3 | ||
425 | sobgtr %r6,1b | ||
426 | |||
427 | movl %r4,(%r5) # save highest add result | ||
428 | |||
429 | ashl $2,%r9,%r4 | ||
430 | subl2 %r4,%r3 | ||
431 | subl2 $4,%r4 | ||
432 | subl2 %r4,%r5 | ||
433 | |||
434 | sobgtr %r8,2b | ||
435 | |||
436 | ret | ||