summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/arch/vax/bn_asm_vax.S
diff options
context:
space:
mode:
authormiod <>2014-04-11 22:51:54 +0000
committermiod <>2014-04-11 22:51:54 +0000
commite4e5cfce71aea090d747d436ab48c4717f78c97c (patch)
treeafd4b98775fa2752df82417abdf319a3af0bb439 /src/lib/libcrypto/arch/vax/bn_asm_vax.S
parent3c70ae462fc747402d562cd98a2825922441cdda (diff)
downloadopenbsd-e4e5cfce71aea090d747d436ab48c4717f78c97c.tar.gz
openbsd-e4e5cfce71aea090d747d436ab48c4717f78c97c.tar.bz2
openbsd-e4e5cfce71aea090d747d436ab48c4717f78c97c.zip
Move build machinery for libcrypto from libssl/crypto to libcrypto, as well
as configuration files; split manpages and .pc files between libcrypto and libssl. No functional change, only there to make engineering easier, and libcrypto sources are still found in libssl/src/crypto at the moment. ok reyk@, also discussed with deraadt@ beck@ and the usual crypto suspects.
Diffstat (limited to 'src/lib/libcrypto/arch/vax/bn_asm_vax.S')
-rw-r--r--src/lib/libcrypto/arch/vax/bn_asm_vax.S436
1 files changed, 436 insertions, 0 deletions
diff --git a/src/lib/libcrypto/arch/vax/bn_asm_vax.S b/src/lib/libcrypto/arch/vax/bn_asm_vax.S
new file mode 100644
index 0000000000..2969ae9dac
--- /dev/null
+++ b/src/lib/libcrypto/arch/vax/bn_asm_vax.S
@@ -0,0 +1,436 @@
1# $OpenBSD: bn_asm_vax.S,v 1.1 2014/04/11 22:51:53 miod Exp $
2# $NetBSD: bn_asm_vax.S,v 1.1 2003/11/03 10:22:28 ragge Exp $
3
4#include <machine/asm.h>
5
6# w.j.m. 15-jan-1999
7#
8# it's magic ...
9#
10# ULONG bn_mul_add_words(ULONG r[],ULONG a[],int n,ULONG w) {
11# ULONG c = 0;
12# int i;
13# for(i = 0; i < n; i++) <c,r[i]> := r[i] + c + a[i] * w ;
14# return c;
15# }
16
17ENTRY(bn_mul_add_words,R6)
18 movl 4(%ap),%r2 # *r
19 movl 8(%ap),%r3 # *a
20 movl 12(%ap),%r4 # n
21 movl 16(%ap),%r5 # w
22 clrl %r6 # return value ("carry")
23
240: emul %r5,(%r3),(%r2),%r0 # w * a[0] + r[0] -> r0
25
26 # fixup for "negative" r[]
27 tstl (%r2)
28 bgeq 1f
29 incl %r1 # add 1 to highword
30
311: # add saved carry to result
32 addl2 %r6,%r0
33 adwc $0,%r1
34
35 # combined fixup for "negative" w, a[]
36 tstl %r5 # if w is negative...
37 bgeq 1f
38 addl2 (%r3),%r1 # ...add a[0] again to highword
391: tstl (%r3) # if a[0] is negative...
40 bgeq 1f
41 addl2 %r5,%r1 # ...add w again to highword
421:
43 movl %r0,(%r2)+ # save low word in dest & advance *r
44 addl2 $4,%r3 # advance *a
45 movl %r1,%r6 # high word in r6 for return value
46
47 sobgtr %r4,0b # loop?
48
49 movl %r6,%r0
50 ret
51
52# .title vax_bn_mul_words unsigned multiply & add, 32*32+32=>64
53#;
54#; w.j.m. 15-jan-1999
55#;
56#; it's magic ...
57#;
58#; ULONG bn_mul_words(ULONG r[],ULONG a[],int n,ULONG w) {
59#; ULONG c = 0;
60#; int i;
61#; for(i = 0; i < num; i++) <c,r[i]> := a[i] * w + c ;
62#; return(c);
63#; }
64#
65
66ENTRY(bn_mul_words,R6)
67 movl 4(%ap),%r2 # *r
68 movl 8(%ap),%r3 # *a
69 movl 12(%ap),%r4 # n
70 movl 16(%ap),%r5 # w
71 clrl %r6 # carry
72
730: emul %r5,(%r3),%r6,%r0 # w * a[0] + carry -> r0
74
75 # fixup for "negative" carry
76 tstl %r6
77 bgeq 1f
78 incl %r1
79
801: # combined fixup for "negative" w, a[]
81 tstl %r5
82 bgeq 1f
83 addl2 (%r3),%r1
841: tstl (%r3)
85 bgeq 1f
86 addl2 %r5,%r1
87
881: movl %r0,(%r2)+
89 addl2 $4,%r3
90 movl %r1,%r6
91
92 sobgtr %r4,0b
93
94 movl %r6,%r0
95 ret
96
97
98
99# .title vax_bn_sqr_words unsigned square, 32*32=>64
100#;
101#; w.j.m. 15-jan-1999
102#;
103#; it's magic ...
104#;
105#; void bn_sqr_words(ULONG r[],ULONG a[],int n) {
106#; int i;
107#; for(i = 0; i < n; i++) <r[2*i+1],r[2*i]> := a[i] * a[i] ;
108#; }
109#
110
111ENTRY(bn_sqr_words,0)
112 movl 4(%ap),%r2 # r
113 movl 8(%ap),%r3 # a
114 movl 12(%ap),%r4 # n
115
1160: movl (%r3)+,%r5 # r5 = a[] & advance
117
118 emul %r5,%r5,$0,%r0 # a[0] * a[0] + 0 -> r0
119
120 # fixup for "negative" a[]
121 tstl %r5
122 bgeq 1f
123 addl2 %r5,%r1
124 addl2 %r5,%r1
125
1261: movq %r0,(%r2)+ # store 64-bit result
127
128 sobgtr %r4,0b # loop
129
130 ret
131
132
133# .title vax_bn_div_words unsigned divide
134#;
135#; Richard Levitte 20-Nov-2000
136#;
137#; ULONG bn_div_words(ULONG h, ULONG l, ULONG d)
138#; {
139#; return ((ULONG)((((ULLONG)h)<<32)|l) / (ULLONG)d);
140#; }
141#;
142#; Using EDIV would be very easy, if it didn't do signed calculations.
143#; Any time any of the input numbers are signed, there are problems,
144#; usually with integer overflow, at which point it returns useless
145#; data (the quotient gets the value of l, and the remainder becomes 0).
146#;
147#; If it was just for the dividend, it would be very easy, just divide
148#; it by 2 (unsigned), do the division, multiply the resulting quotient
149#; and remainder by 2, add the bit that was dropped when dividing by 2
150#; to the remainder, and do some adjustment so the remainder doesn't
151#; end up larger than the divisor. For some cases when the divisor is
152#; negative (from EDIV's point of view, i.e. when the highest bit is set),
153#; dividing the dividend by 2 isn't enough, and since some operations
154#; might generate integer overflows even when the dividend is divided by
155#; 4 (when the high part of the shifted down dividend ends up being exactly
156#; half of the divisor, the result is the quotient 0x80000000, which is
157#; negative...) it needs to be divided by 8. Furthermore, the divisor needs
158#; to be divided by 2 (unsigned) as well, to avoid more problems with the sign.
159#; In this case, a little extra fiddling with the remainder is required.
160#;
161#; So, the simplest way to handle this is always to divide the dividend
162#; by 8, and to divide the divisor by 2 if it's highest bit is set.
163#; After EDIV has been used, the quotient gets multiplied by 8 if the
164#; original divisor was positive, otherwise 4. The remainder, oddly
165#; enough, is *always* multiplied by 8.
166#; NOTE: in the case mentioned above, where the high part of the shifted
167#; down dividend ends up being exactly half the shifted down divisor, we
168#; end up with a 33 bit quotient. That's no problem however, it usually
169#; means we have ended up with a too large remainder as well, and the
170#; problem is fixed by the last part of the algorithm (next paragraph).
171#;
172#; The routine ends with comparing the resulting remainder with the
173#; original divisor and if the remainder is larger, subtract the
174#; original divisor from it, and increase the quotient by 1. This is
175#; done until the remainder is smaller than the divisor.
176#;
177#; The complete algorithm looks like this:
178#;
179#; d' = d
180#; l' = l & 7
181#; [h,l] = [h,l] >> 3
182#; [q,r] = floor([h,l] / d) # This is the EDIV operation
183#; if (q < 0) q = -q # I doubt this is necessary any more
184#;
185#; r' = r >> 29
186#; if (d' >= 0)
187#; q' = q >> 29
188#; q = q << 3
189#; else
190#; q' = q >> 30
191#; q = q << 2
192#; r = (r << 3) + l'
193#;
194#; if (d' < 0)
195#; {
196#; [r',r] = [r',r] - q
197#; while ([r',r] < 0)
198#; {
199#; [r',r] = [r',r] + d
200#; [q',q] = [q',q] - 1
201#; }
202#; }
203#;
204#; while ([r',r] >= d')
205#; {
206#; [r',r] = [r',r] - d'
207#; [q',q] = [q',q] + 1
208#; }
209#;
210#; return q
211#
212#;r2 = l, q
213#;r3 = h, r
214#;r4 = d
215#;r5 = l'
216#;r6 = r'
217#;r7 = d'
218#;r8 = q'
219#
220
221ENTRY(bn_div_words,R6|R7|R8)
222 movl 4(%ap),%r3 # h
223 movl 8(%ap),%r2 # l
224 movl 12(%ap),%r4 # d
225
226 bicl3 $-8,%r2,%r5 # l' = l & 7
227 bicl3 $7,%r2,%r2
228
229 bicl3 $-8,%r3,%r6
230 bicl3 $7,%r3,%r3
231
232 addl2 %r6,%r2
233
234 rotl $-3,%r2,%r2 # l = l >> 3
235 rotl $-3,%r3,%r3 # h = h >> 3
236
237 movl %r4,%r7 # d' = d
238
239 clrl %r6 # r' = 0
240 clrl %r8 # q' = 0
241
242 tstl %r4
243 beql 0f # Uh-oh, the divisor is 0...
244 bgtr 1f
245 rotl $-1,%r4,%r4 # If d is negative, shift it right.
246 bicl2 $0x80000000,%r4 # Since d is then a large number, the
247 # lowest bit is insignificant
248 # (contradict that, and I'll fix the problem!)
2491:
250 ediv %r4,%r2,%r2,%r3 # Do the actual division
251
252 tstl %r2
253 bgeq 1f
254 mnegl %r2,%r2 # if q < 0, negate it
2551:
256 tstl %r7
257 blss 1f
258 rotl $3,%r2,%r2 # q = q << 3
259 bicl3 $-8,%r2,%r8 # q' gets the high bits from q
260 bicl3 $7,%r2,%r2
261 brb 2f
262
2631: # else
264 rotl $2,%r2,%r2 # q = q << 2
265 bicl3 $-4,%r2,%r8 # q' gets the high bits from q
266 bicl3 $3,%r2,%r2
2672:
268 rotl $3,%r3,%r3 # r = r << 3
269 bicl3 $-8,%r3,%r6 # r' gets the high bits from r
270 bicl3 $7,%r3,%r3
271 addl2 %r5,%r3 # r = r + l'
272
273 tstl %r7
274 bgeq 5f
275 bitl $1,%r7
276 beql 5f # if d' < 0 && d' & 1
277 subl2 %r2,%r3 # [r',r] = [r',r] - [q',q]
278 sbwc %r8,%r6
2793:
280 bgeq 5f # while r < 0
281 decl %r2 # [q',q] = [q',q] - 1
282 sbwc $0,%r8
283 addl2 %r7,%r3 # [r',r] = [r',r] + d'
284 adwc $0,%r6
285 brb 3b
286
287# The return points are placed in the middle to keep a short distance from
288# all the branch points
2891:
290# movl %r3,%r1
291 movl %r2,%r0
292 ret
2930:
294 movl $-1,%r0
295 ret
2965:
297 tstl %r6
298 bneq 6f
299 cmpl %r3,%r7
300 blssu 1b # while [r',r] >= d'
3016:
302 subl2 %r7,%r3 # [r',r] = [r',r] - d'
303 sbwc $0,%r6
304 incl %r2 # [q',q] = [q',q] + 1
305 adwc $0,%r8
306 brb 5b
307
308
309
310# .title vax_bn_add_words unsigned add of two arrays
311#;
312#; Richard Levitte 20-Nov-2000
313#;
314#; ULONG bn_add_words(ULONG r[], ULONG a[], ULONG b[], int n) {
315#; ULONG c = 0;
316#; int i;
317#; for (i = 0; i < n; i++) <c,r[i]> = a[i] + b[i] + c;
318#; return(c);
319#; }
320#
321
322ENTRY(bn_add_words,0)
323 movl 4(%ap),%r2 # r
324 movl 8(%ap),%r3 # a
325 movl 12(%ap),%r4 # b
326 movl 16(%ap),%r5 # n
327 clrl %r0
328
329 tstl %r5
330 bleq 1f
331
3320: movl (%r3)+,%r1 # carry untouched
333 adwc (%r4)+,%r1 # carry used and touched
334 movl %r1,(%r2)+ # carry untouched
335 sobgtr %r5,0b # carry untouched
336
337 adwc $0,%r0
3381: ret
339
340#;
341#; Richard Levitte 20-Nov-2000
342#;
343#; ULONG bn_sub_words(ULONG r[], ULONG a[], ULONG b[], int n) {
344#; ULONG c = 0;
345#; int i;
346#; for (i = 0; i < n; i++) <c,r[i]> = a[i] - b[i] - c;
347#; return(c);
348#; }
349#
350
351ENTRY(bn_sub_words,R6)
352 movl 4(%ap),%r2 # r
353 movl 8(%ap),%r3 # a
354 movl 12(%ap),%r4 # b
355 movl 16(%ap),%r5 # n
356 clrl %r0
357
358 tstl %r5
359 bleq 1f
360
3610: movl (%r3)+,%r6 # carry untouched
362 sbwc (%r4)+,%r6 # carry used and touched
363 movl %r6,(%r2)+ # carry untouched
364 sobgtr %r5,0b # carry untouched
365
3661: adwc $0,%r0
367 ret
368
369#
370# Ragge 20-Sep-2003
371#
372# Multiply a vector of 4/8 longword by another.
373# Uses two loops and 16/64 emuls.
374#
375
376ENTRY(bn_mul_comba4,R6|R7|R8|R9)
377 movl $4,%r9 # 4*4
378 brb 6f
379
380ENTRY(bn_mul_comba8,R6|R7|R8|R9)
381 movl $8,%r9 # 8*8
382
3836: movl 8(%ap),%r3 # a[]
384 movl 12(%ap),%r7 # b[]
385 brb 5f
386
387ENTRY(bn_sqr_comba4,R6|R7|R8|R9)
388 movl $4,%r9 # 4*4
389 brb 0f
390
391ENTRY(bn_sqr_comba8,R6|R7|R8|R9)
392 movl $8,%r9 # 8*8
393
3940:
395 movl 8(%ap),%r3 # a[]
396 movl %r3,%r7 # a[]
397
3985: movl 4(%ap),%r5 # r[]
399 movl %r9,%r8
400
401 clrq (%r5) # clear destinatino, for add.
402 clrq 8(%r5)
403 clrq 16(%r5) # these only needed for comba8
404 clrq 24(%r5)
405
4062: clrl %r4 # carry
407 movl %r9,%r6 # inner loop count
408 movl (%r7)+,%r2 # value to multiply with
409
4101: emul %r2,(%r3),%r4,%r0
411 tstl %r4
412 bgeq 3f
413 incl %r1
4143: tstl %r2
415 bgeq 3f
416 addl2 (%r3),%r1
4173: tstl (%r3)
418 bgeq 3f
419 addl2 %r2,%r1
420
4213: addl2 %r0,(%r5)+ # add to destination
422 adwc $0,%r1 # remember carry
423 movl %r1,%r4 # add carry in next emul
424 addl2 $4,%r3
425 sobgtr %r6,1b
426
427 movl %r4,(%r5) # save highest add result
428
429 ashl $2,%r9,%r4
430 subl2 %r4,%r3
431 subl2 $4,%r4
432 subl2 %r4,%r5
433
434 sobgtr %r8,2b
435
436 ret