Move build machinery for libcrypto from libssl/crypto to libcrypto, as well

as configuration files; split manpages and .pc files between libcrypto and libssl. No functional change, only there to make engineering easier, and libcrypto sources are still found in libssl/src/crypto at the moment. ok reyk@, also discussed with deraadt@ beck@ and the usual crypto suspects.
author: miod <> 2014-04-11 22:51:54 +0000
committer: miod <> 2014-04-11 22:51:54 +0000
commit: e4e5cfce71aea090d747d436ab48c4717f78c97c (patch)
tree: afd4b98775fa2752df82417abdf319a3af0bb439 /src/lib/libcrypto/arch/vax/bn_asm_vax.S
parent: 3c70ae462fc747402d562cd98a2825922441cdda (diff)
download: openbsd-e4e5cfce71aea090d747d436ab48c4717f78c97c.tar.gz
openbsd-e4e5cfce71aea090d747d436ab48c4717f78c97c.tar.bz2
openbsd-e4e5cfce71aea090d747d436ab48c4717f78c97c.zip
1 files changed, 436 insertions, 0 deletions
diff --git a/src/lib/libcrypto/arch/vax/bn_asm_vax.S b/src/lib/libcrypto/arch/vax/bn_asm_vax.S
new file mode 100644
index 0000000000..2969ae9dac
--- /dev/null
+++ b/src/lib/libcrypto/arch/vax/bn_asm_vax.S
@@ -0,0 +1,436 @@
+#       $OpenBSD: bn_asm_vax.S,v 1.1 2014/04/11 22:51:53 miod Exp $
+#       $NetBSD: bn_asm_vax.S,v 1.1 2003/11/03 10:22:28 ragge Exp $
+#include <machine/asm.h>
+# w.j.m. 15-jan-1999
+#
+# it's magic ...
+#
+# ULONG bn_mul_add_words(ULONG r[],ULONG a[],int n,ULONG w) {
+#       ULONG c = 0;
+#       int i;
+#       for(i = 0; i < n; i++) <c,r[i]> := r[i] + c + a[i] * w ;
+#       return c;
+# }
+ENTRY(bn_mul_add_words,R6)
+        movl    4(%ap),%r2              # *r
+        movl    8(%ap),%r3              # *a
+        movl    12(%ap),%r4             # n
+        movl    16(%ap),%r5             # w
+        clrl    %r6                     # return value ("carry")
+0:      emul    %r5,(%r3),(%r2),%r0     # w * a[0] + r[0] -> r0
+        # fixup for "negative" r[]
+        tstl    (%r2)
+        bgeq    1f
+        incl    %r1                     # add 1 to highword
+1:      # add saved carry to result
+        addl2   %r6,%r0
+        adwc    $0,%r1
+        # combined fixup for "negative" w, a[]
+        tstl    %r5             # if w is negative...
+        bgeq    1f
+        addl2   (%r3),%r1       # ...add a[0] again to highword
+1:      tstl    (%r3)           # if a[0] is negative...
+        bgeq    1f
+        addl2   %r5,%r1         # ...add w again to highword
+1:
+        movl    %r0,(%r2)+      # save low word in dest & advance *r
+        addl2   $4,%r3          # advance *a
+        movl    %r1,%r6         # high word in r6 for return value
+        sobgtr  %r4,0b          # loop?
+        movl    %r6,%r0
+        ret
+#       .title  vax_bn_mul_words  unsigned multiply & add, 32*32+32=>64
+#;
+#; w.j.m. 15-jan-1999
+#;
+#; it's magic ...
+#;
+#; ULONG bn_mul_words(ULONG r[],ULONG a[],int n,ULONG w) {
+#;      ULONG c = 0;
+#;      int i;
+#;      for(i = 0; i < num; i++) <c,r[i]> := a[i] * w + c ;
+#;      return(c);
+#; }
+#
+ENTRY(bn_mul_words,R6)
+        movl    4(%ap),%r2              # *r
+        movl    8(%ap),%r3              # *a
+        movl    12(%ap),%r4             # n
+        movl    16(%ap),%r5             # w
+        clrl    %r6                     # carry
+0:      emul    %r5,(%r3),%r6,%r0       # w * a[0] + carry -> r0
+        # fixup for "negative" carry
+        tstl    %r6
+        bgeq    1f
+        incl    %r1
+1:      # combined fixup for "negative" w, a[]
+        tstl    %r5
+        bgeq    1f
+        addl2   (%r3),%r1
+1:      tstl    (%r3)
+        bgeq    1f
+        addl2   %r5,%r1
+1:      movl    %r0,(%r2)+
+        addl2   $4,%r3
+        movl    %r1,%r6
+        sobgtr  %r4,0b
+        movl    %r6,%r0
+        ret
+#       .title  vax_bn_sqr_words  unsigned square, 32*32=>64
+#;
+#; w.j.m. 15-jan-1999
+#;
+#; it's magic ...
+#;
+#; void bn_sqr_words(ULONG r[],ULONG a[],int n) {
+#;      int i;
+#;      for(i = 0; i < n; i++) <r[2*i+1],r[2*i]> := a[i] * a[i] ;
+#; }
+#
+ENTRY(bn_sqr_words,0)
+        movl    4(%ap),%r2              # r
+        movl    8(%ap),%r3              # a
+        movl    12(%ap),%r4             # n
+0:      movl    (%r3)+,%r5              # r5 = a[] & advance
+        emul    %r5,%r5,$0,%r0          # a[0] * a[0] + 0 -> r0
+        # fixup for "negative" a[]
+        tstl    %r5
+        bgeq    1f
+        addl2   %r5,%r1
+        addl2   %r5,%r1
+1:      movq    %r0,(%r2)+              # store 64-bit result
+        sobgtr  %r4,0b                  # loop
+        ret
+#       .title  vax_bn_div_words  unsigned divide
+#;
+#; Richard Levitte 20-Nov-2000
+#;
+#; ULONG bn_div_words(ULONG h, ULONG l, ULONG d)
+#; {
+#;      return ((ULONG)((((ULLONG)h)<<32)|l) / (ULLONG)d);
+#; }
+#;
+#; Using EDIV would be very easy, if it didn't do signed calculations.
+#; Any time any of the input numbers are signed, there are problems,
+#; usually with integer overflow, at which point it returns useless
+#; data (the quotient gets the value of l, and the remainder becomes 0).
+#;
+#; If it was just for the dividend, it would be very easy, just divide
+#; it by 2 (unsigned), do the division, multiply the resulting quotient
+#; and remainder by 2, add the bit that was dropped when dividing by 2
+#; to the remainder, and do some adjustment so the remainder doesn't
+#; end up larger than the divisor.  For some cases when the divisor is
+#; negative (from EDIV's point of view, i.e. when the highest bit is set),
+#; dividing the dividend by 2 isn't enough, and since some operations
+#; might generate integer overflows even when the dividend is divided by
+#; 4 (when the high part of the shifted down dividend ends up being exactly
+#; half of the divisor, the result is the quotient 0x80000000, which is
+#; negative...) it needs to be divided by 8.  Furthermore, the divisor needs
+#; to be divided by 2 (unsigned) as well, to avoid more problems with the sign.
+#; In this case, a little extra fiddling with the remainder is required.
+#;
+#; So, the simplest way to handle this is always to divide the dividend
+#; by 8, and to divide the divisor by 2 if it's highest bit is set.
+#; After EDIV has been used, the quotient gets multiplied by 8 if the
+#; original divisor was positive, otherwise 4.  The remainder, oddly
+#; enough, is *always* multiplied by 8.
+#; NOTE: in the case mentioned above, where the high part of the shifted
+#; down dividend ends up being exactly half the shifted down divisor, we
+#; end up with a 33 bit quotient.  That's no problem however, it usually
+#; means we have ended up with a too large remainder as well, and the
+#; problem is fixed by the last part of the algorithm (next paragraph).
+#;
+#; The routine ends with comparing the resulting remainder with the
+#; original divisor and if the remainder is larger, subtract the
+#; original divisor from it, and increase the quotient by 1.  This is
+#; done until the remainder is smaller than the divisor.
+#;
+#; The complete algorithm looks like this:
+#;
+#; d'    = d
+#; l'    = l & 7
+#; [h,l] = [h,l] >> 3
+#; [q,r] = floor([h,l] / d)     # This is the EDIV operation
+#; if (q < 0) q = -q            # I doubt this is necessary any more
+#;
+#; r'    = r >> 29
+#; if (d' >= 0)
+#;   q'  = q >> 29
+#;   q   = q << 3
+#; else
+#;   q'  = q >> 30
+#;   q   = q << 2
+#; r     = (r << 3) + l'
+#;
+#; if (d' < 0)
+#;   {
+#;     [r',r] = [r',r] - q
+#;     while ([r',r] < 0)
+#;       {
+#;         [r',r] = [r',r] + d
+#;         [q',q] = [q',q] - 1
+#;       }
+#;   }
+#;
+#; while ([r',r] >= d')
+#;   {
+#;     [r',r] = [r',r] - d'
+#;     [q',q] = [q',q] + 1
+#;   }
+#;
+#; return q
+#
+#;r2 = l, q
+#;r3 = h, r
+#;r4 = d
+#;r5 = l'
+#;r6 = r'
+#;r7 = d'
+#;r8 = q'
+#
+ENTRY(bn_div_words,R6|R7|R8)
+        movl    4(%ap),%r3              # h
+        movl    8(%ap),%r2              # l
+        movl    12(%ap),%r4             # d
+        bicl3   $-8,%r2,%r5             # l' = l & 7
+        bicl3   $7,%r2,%r2
+        bicl3   $-8,%r3,%r6
+        bicl3   $7,%r3,%r3
+        addl2   %r6,%r2
+        rotl    $-3,%r2,%r2             # l = l >> 3
+        rotl    $-3,%r3,%r3             # h = h >> 3
+        movl    %r4,%r7                 # d' = d
+        clrl    %r6                     # r' = 0
+        clrl    %r8                     # q' = 0
+        tstl    %r4
+        beql    0f                      # Uh-oh, the divisor is 0...
+        bgtr    1f
+        rotl    $-1,%r4,%r4     # If d is negative, shift it right.
+        bicl2   $0x80000000,%r4 # Since d is then a large number, the
+                                # lowest bit is insignificant
+                                # (contradict that, and I'll fix the problem!)
+1:
+        ediv    %r4,%r2,%r2,%r3         # Do the actual division
+        tstl    %r2
+        bgeq    1f
+        mnegl   %r2,%r2         # if q < 0, negate it
+1:
+        tstl    %r7
+        blss    1f
+        rotl    $3,%r2,%r2      #   q = q << 3
+        bicl3   $-8,%r2,%r8     #   q' gets the high bits from q
+        bicl3   $7,%r2,%r2
+        brb     2f
+1:                              # else
+        rotl    $2,%r2,%r2      #   q = q << 2
+        bicl3   $-4,%r2,%r8     #   q' gets the high bits from q
+        bicl3   $3,%r2,%r2
+2:
+        rotl    $3,%r3,%r3      # r = r << 3
+        bicl3   $-8,%r3,%r6     # r' gets the high bits from r
+        bicl3   $7,%r3,%r3
+        addl2   %r5,%r3         # r = r + l'
+        tstl    %r7
+        bgeq    5f
+        bitl    $1,%r7
+        beql    5f              # if d' < 0 && d' & 1
+        subl2   %r2,%r3         #   [r',r] = [r',r] - [q',q]
+        sbwc    %r8,%r6
+3:
+        bgeq    5f              #   while r < 0
+        decl    %r2             #     [q',q] = [q',q] - 1
+        sbwc    $0,%r8
+        addl2   %r7,%r3         #     [r',r] = [r',r] + d'
+        adwc    $0,%r6
+        brb     3b
+# The return points are placed in the middle to keep a short distance from
+# all the branch points
+1:
+#       movl    %r3,%r1
+        movl    %r2,%r0
+        ret
+0:
+        movl    $-1,%r0
+        ret
+5:
+        tstl    %r6
+        bneq    6f
+        cmpl    %r3,%r7
+        blssu   1b              # while [r',r] >= d'
+6:
+        subl2   %r7,%r3         #   [r',r] = [r',r] - d'
+        sbwc    $0,%r6
+        incl    %r2             #   [q',q] = [q',q] + 1
+        adwc    $0,%r8
+        brb     5b
+#       .title  vax_bn_add_words  unsigned add of two arrays
+#;
+#; Richard Levitte 20-Nov-2000
+#;
+#; ULONG bn_add_words(ULONG r[], ULONG a[], ULONG b[], int n) {
+#;      ULONG c = 0;
+#;      int i;
+#;      for (i = 0; i < n; i++) <c,r[i]> = a[i] + b[i] + c;
+#;      return(c);
+#; }
+#
+ENTRY(bn_add_words,0)
+        movl    4(%ap),%r2      # r
+        movl    8(%ap),%r3      # a
+        movl    12(%ap),%r4     # b
+        movl    16(%ap),%r5     # n
+        clrl    %r0
+        tstl    %r5
+        bleq    1f
+0:      movl    (%r3)+,%r1      # carry untouched
+        adwc    (%r4)+,%r1      # carry used and touched
+        movl    %r1,(%r2)+      # carry untouched
+        sobgtr  %r5,0b          # carry untouched
+        adwc    $0,%r0
+1:      ret
+#;
+#; Richard Levitte 20-Nov-2000
+#;
+#; ULONG bn_sub_words(ULONG r[], ULONG a[], ULONG b[], int n) {
+#;      ULONG c = 0;
+#;      int i;
+#;      for (i = 0; i < n; i++) <c,r[i]> = a[i] - b[i] - c;
+#;      return(c);
+#; }
+#
+ENTRY(bn_sub_words,R6)
+        movl    4(%ap),%r2      # r
+        movl    8(%ap),%r3      # a
+        movl    12(%ap),%r4     # b
+        movl    16(%ap),%r5     # n
+        clrl    %r0
+        tstl    %r5
+        bleq    1f
+0:      movl    (%r3)+,%r6      # carry untouched
+        sbwc    (%r4)+,%r6      # carry used and touched
+        movl    %r6,(%r2)+      # carry untouched
+        sobgtr  %r5,0b          # carry untouched
+1:      adwc    $0,%r0
+        ret
+#
+#       Ragge 20-Sep-2003
+#
+#       Multiply a vector of 4/8 longword by another.
+#       Uses two loops and 16/64 emuls.
+#
+ENTRY(bn_mul_comba4,R6|R7|R8|R9)
+        movl    $4,%r9          # 4*4
+        brb     6f
+ENTRY(bn_mul_comba8,R6|R7|R8|R9)
+        movl    $8,%r9          # 8*8
+6:      movl    8(%ap),%r3      # a[]
+        movl    12(%ap),%r7     # b[]
+        brb     5f
+ENTRY(bn_sqr_comba4,R6|R7|R8|R9)
+        movl    $4,%r9          # 4*4
+        brb 0f
+ENTRY(bn_sqr_comba8,R6|R7|R8|R9)
+        movl    $8,%r9          # 8*8
+0:
+        movl    8(%ap),%r3      # a[]
+        movl    %r3,%r7         # a[]
+5:      movl    4(%ap),%r5      # r[]
+        movl    %r9,%r8
+        clrq    (%r5)           # clear destinatino, for add.
+        clrq    8(%r5)
+        clrq    16(%r5)         # these only needed for comba8
+        clrq    24(%r5)
+2:      clrl    %r4             # carry
+        movl    %r9,%r6         # inner loop count
+        movl    (%r7)+,%r2      # value to multiply with
+1:      emul    %r2,(%r3),%r4,%r0
+        tstl    %r4
+        bgeq    3f
+        incl    %r1
+3:      tstl    %r2
+        bgeq    3f
+        addl2   (%r3),%r1
+3:      tstl    (%r3)
+        bgeq    3f
+        addl2   %r2,%r1
+3:      addl2   %r0,(%r5)+      # add to destination
+        adwc    $0,%r1          # remember carry
+        movl    %r1,%r4         # add carry in next emul
+        addl2   $4,%r3
+        sobgtr  %r6,1b
+        movl    %r4,(%r5)       # save highest add result
+        ashl    $2,%r9,%r4
+        subl2   %r4,%r3
+        subl2   $4,%r4
+        subl2   %r4,%r5
+        sobgtr  %r8,2b
+        ret
author	miod <>	2014-04-11 22:51:54 +0000
committer	miod <>	2014-04-11 22:51:54 +0000
commit	e4e5cfce71aea090d747d436ab48c4717f78c97c (patch)
tree	afd4b98775fa2752df82417abdf319a3af0bb439 /src/lib/libcrypto/arch/vax/bn_asm_vax.S
parent	3c70ae462fc747402d562cd98a2825922441cdda (diff)
download	openbsd-e4e5cfce71aea090d747d436ab48c4717f78c97c.tar.gz openbsd-e4e5cfce71aea090d747d436ab48c4717f78c97c.tar.bz2 openbsd-e4e5cfce71aea090d747d436ab48c4717f78c97c.zip

diff --git a/src/lib/libcrypto/arch/vax/bn_asm_vax.S b/src/lib/libcrypto/arch/vax/bn_asm_vax.S new file mode 100644 index 0000000000..2969ae9dac --- /dev/null +++ b/src/lib/libcrypto/arch/vax/bn_asm_vax.S
@@ -0,0 +1,436 @@
	1	# $OpenBSD: bn_asm_vax.S,v 1.1 2014/04/11 22:51:53 miod Exp $
	2	# $NetBSD: bn_asm_vax.S,v 1.1 2003/11/03 10:22:28 ragge Exp $
	3
	4	#include <machine/asm.h>
	5
	6	# w.j.m. 15-jan-1999
	7	#
	8	# it's magic ...
	9	#
	10	# ULONG bn_mul_add_words(ULONG r[],ULONG a[],int n,ULONG w) {
	11	# ULONG c = 0;
	12	# int i;
	13	# for(i = 0; i < n; i++) <c,r[i]> := r[i] + c + a[i] * w ;
	14	# return c;
	15	# }
	16
	17	ENTRY(bn_mul_add_words,R6)
	18	movl 4(%ap),%r2 # *r
	19	movl 8(%ap),%r3 # *a
	20	movl 12(%ap),%r4 # n
	21	movl 16(%ap),%r5 # w
	22	clrl %r6 # return value ("carry")
	23
	24	0: emul %r5,(%r3),(%r2),%r0 # w * a[0] + r[0] -> r0
	25
	26	# fixup for "negative" r[]
	27	tstl (%r2)
	28	bgeq 1f
	29	incl %r1 # add 1 to highword
	30
	31	1: # add saved carry to result
	32	addl2 %r6,%r0
	33	adwc $0,%r1
	34
	35	# combined fixup for "negative" w, a[]
	36	tstl %r5 # if w is negative...
	37	bgeq 1f
	38	addl2 (%r3),%r1 # ...add a[0] again to highword
	39	1: tstl (%r3) # if a[0] is negative...
	40	bgeq 1f
	41	addl2 %r5,%r1 # ...add w again to highword
	42	1:
	43	movl %r0,(%r2)+ # save low word in dest & advance *r
	44	addl2 $4,%r3 # advance *a
	45	movl %r1,%r6 # high word in r6 for return value
	46
	47	sobgtr %r4,0b # loop?
	48
	49	movl %r6,%r0
	50	ret
	51
	52	# .title vax_bn_mul_words unsigned multiply & add, 32*32+32=>64
	53	#;
	54	#; w.j.m. 15-jan-1999
	55	#;
	56	#; it's magic ...
	57	#;
	58	#; ULONG bn_mul_words(ULONG r[],ULONG a[],int n,ULONG w) {
	59	#; ULONG c = 0;
	60	#; int i;
	61	#; for(i = 0; i < num; i++) <c,r[i]> := a[i] * w + c ;
	62	#; return(c);
	63	#; }
	64	#
	65
	66	ENTRY(bn_mul_words,R6)
	67	movl 4(%ap),%r2 # *r
	68	movl 8(%ap),%r3 # *a
	69	movl 12(%ap),%r4 # n
	70	movl 16(%ap),%r5 # w
	71	clrl %r6 # carry
	72
	73	0: emul %r5,(%r3),%r6,%r0 # w * a[0] + carry -> r0
	74
	75	# fixup for "negative" carry
	76	tstl %r6
	77	bgeq 1f
	78	incl %r1
	79
	80	1: # combined fixup for "negative" w, a[]
	81	tstl %r5
	82	bgeq 1f
	83	addl2 (%r3),%r1
	84	1: tstl (%r3)
	85	bgeq 1f
	86	addl2 %r5,%r1
	87
	88	1: movl %r0,(%r2)+
	89	addl2 $4,%r3
	90	movl %r1,%r6
	91
	92	sobgtr %r4,0b
	93
	94	movl %r6,%r0
	95	ret
	96
	97
	98
	99	# .title vax_bn_sqr_words unsigned square, 32*32=>64
	100	#;
	101	#; w.j.m. 15-jan-1999
	102	#;
	103	#; it's magic ...
	104	#;
	105	#; void bn_sqr_words(ULONG r[],ULONG a[],int n) {
	106	#; int i;
	107	#; for(i = 0; i < n; i++) <r[2i+1],r[2i]> := a[i] * a[i] ;
	108	#; }
	109	#
	110
	111	ENTRY(bn_sqr_words,0)
	112	movl 4(%ap),%r2 # r
	113	movl 8(%ap),%r3 # a
	114	movl 12(%ap),%r4 # n
	115
	116	0: movl (%r3)+,%r5 # r5 = a[] & advance
	117
	118	emul %r5,%r5,$0,%r0 # a[0] * a[0] + 0 -> r0
	119
	120	# fixup for "negative" a[]
	121	tstl %r5
	122	bgeq 1f
	123	addl2 %r5,%r1
	124	addl2 %r5,%r1
	125
	126	1: movq %r0,(%r2)+ # store 64-bit result
	127
	128	sobgtr %r4,0b # loop
	129
	130	ret
	131
	132
	133	# .title vax_bn_div_words unsigned divide
	134	#;
	135	#; Richard Levitte 20-Nov-2000
	136	#;
	137	#; ULONG bn_div_words(ULONG h, ULONG l, ULONG d)
	138	#; {
	139	#; return ((ULONG)((((ULLONG)h)<<32)\|l) / (ULLONG)d);
	140	#; }
	141	#;
	142	#; Using EDIV would be very easy, if it didn't do signed calculations.
	143	#; Any time any of the input numbers are signed, there are problems,
	144	#; usually with integer overflow, at which point it returns useless
	145	#; data (the quotient gets the value of l, and the remainder becomes 0).
	146	#;
	147	#; If it was just for the dividend, it would be very easy, just divide
	148	#; it by 2 (unsigned), do the division, multiply the resulting quotient
	149	#; and remainder by 2, add the bit that was dropped when dividing by 2
	150	#; to the remainder, and do some adjustment so the remainder doesn't
	151	#; end up larger than the divisor. For some cases when the divisor is
	152	#; negative (from EDIV's point of view, i.e. when the highest bit is set),
	153	#; dividing the dividend by 2 isn't enough, and since some operations
	154	#; might generate integer overflows even when the dividend is divided by
	155	#; 4 (when the high part of the shifted down dividend ends up being exactly
	156	#; half of the divisor, the result is the quotient 0x80000000, which is
	157	#; negative...) it needs to be divided by 8. Furthermore, the divisor needs
	158	#; to be divided by 2 (unsigned) as well, to avoid more problems with the sign.
	159	#; In this case, a little extra fiddling with the remainder is required.
	160	#;
	161	#; So, the simplest way to handle this is always to divide the dividend
	162	#; by 8, and to divide the divisor by 2 if it's highest bit is set.
	163	#; After EDIV has been used, the quotient gets multiplied by 8 if the
	164	#; original divisor was positive, otherwise 4. The remainder, oddly
	165	#; enough, is always multiplied by 8.
	166	#; NOTE: in the case mentioned above, where the high part of the shifted
	167	#; down dividend ends up being exactly half the shifted down divisor, we
	168	#; end up with a 33 bit quotient. That's no problem however, it usually
	169	#; means we have ended up with a too large remainder as well, and the
	170	#; problem is fixed by the last part of the algorithm (next paragraph).
	171	#;
	172	#; The routine ends with comparing the resulting remainder with the
	173	#; original divisor and if the remainder is larger, subtract the
	174	#; original divisor from it, and increase the quotient by 1. This is
	175	#; done until the remainder is smaller than the divisor.
	176	#;
	177	#; The complete algorithm looks like this:
	178	#;
	179	#; d' = d
	180	#; l' = l & 7
	181	#; [h,l] = [h,l] >> 3
	182	#; [q,r] = floor([h,l] / d) # This is the EDIV operation
	183	#; if (q < 0) q = -q # I doubt this is necessary any more
	184	#;
	185	#; r' = r >> 29
	186	#; if (d' >= 0)
	187	#; q' = q >> 29
	188	#; q = q << 3
	189	#; else
	190	#; q' = q >> 30
	191	#; q = q << 2
	192	#; r = (r << 3) + l'
	193	#;
	194	#; if (d' < 0)
	195	#; {
	196	#; [r',r] = [r',r] - q
	197	#; while ([r',r] < 0)
	198	#; {
	199	#; [r',r] = [r',r] + d
	200	#; [q',q] = [q',q] - 1
	201	#; }
	202	#; }
	203	#;
	204	#; while ([r',r] >= d')
	205	#; {
	206	#; [r',r] = [r',r] - d'
	207	#; [q',q] = [q',q] + 1
	208	#; }
	209	#;
	210	#; return q
	211	#
	212	#;r2 = l, q
	213	#;r3 = h, r
	214	#;r4 = d
	215	#;r5 = l'
	216	#;r6 = r'
	217	#;r7 = d'
	218	#;r8 = q'
	219	#
	220
	221	ENTRY(bn_div_words,R6\|R7\|R8)
	222	movl 4(%ap),%r3 # h
	223	movl 8(%ap),%r2 # l
	224	movl 12(%ap),%r4 # d
	225
	226	bicl3 $-8,%r2,%r5 # l' = l & 7
	227	bicl3 $7,%r2,%r2
	228
	229	bicl3 $-8,%r3,%r6
	230	bicl3 $7,%r3,%r3
	231
	232	addl2 %r6,%r2
	233
	234	rotl $-3,%r2,%r2 # l = l >> 3
	235	rotl $-3,%r3,%r3 # h = h >> 3
	236
	237	movl %r4,%r7 # d' = d
	238
	239	clrl %r6 # r' = 0
	240	clrl %r8 # q' = 0
	241
	242	tstl %r4
	243	beql 0f # Uh-oh, the divisor is 0...
	244	bgtr 1f
	245	rotl $-1,%r4,%r4 # If d is negative, shift it right.
	246	bicl2 $0x80000000,%r4 # Since d is then a large number, the
	247	# lowest bit is insignificant
	248	# (contradict that, and I'll fix the problem!)
	249	1:
	250	ediv %r4,%r2,%r2,%r3 # Do the actual division
	251
	252	tstl %r2
	253	bgeq 1f
	254	mnegl %r2,%r2 # if q < 0, negate it
	255	1:
	256	tstl %r7
	257	blss 1f
	258	rotl $3,%r2,%r2 # q = q << 3
	259	bicl3 $-8,%r2,%r8 # q' gets the high bits from q
	260	bicl3 $7,%r2,%r2
	261	brb 2f
	262
	263	1: # else
	264	rotl $2,%r2,%r2 # q = q << 2
	265	bicl3 $-4,%r2,%r8 # q' gets the high bits from q
	266	bicl3 $3,%r2,%r2
	267	2:
	268	rotl $3,%r3,%r3 # r = r << 3
	269	bicl3 $-8,%r3,%r6 # r' gets the high bits from r
	270	bicl3 $7,%r3,%r3
	271	addl2 %r5,%r3 # r = r + l'
	272
	273	tstl %r7
	274	bgeq 5f
	275	bitl $1,%r7
	276	beql 5f # if d' < 0 && d' & 1
	277	subl2 %r2,%r3 # [r',r] = [r',r] - [q',q]
	278	sbwc %r8,%r6
	279	3:
	280	bgeq 5f # while r < 0
	281	decl %r2 # [q',q] = [q',q] - 1
	282	sbwc $0,%r8
	283	addl2 %r7,%r3 # [r',r] = [r',r] + d'
	284	adwc $0,%r6
	285	brb 3b
	286
	287	# The return points are placed in the middle to keep a short distance from
	288	# all the branch points
	289	1:
	290	# movl %r3,%r1
	291	movl %r2,%r0
	292	ret
	293	0:
	294	movl $-1,%r0
	295	ret
	296	5:
	297	tstl %r6
	298	bneq 6f
	299	cmpl %r3,%r7
	300	blssu 1b # while [r',r] >= d'
	301	6:
	302	subl2 %r7,%r3 # [r',r] = [r',r] - d'
	303	sbwc $0,%r6
	304	incl %r2 # [q',q] = [q',q] + 1
	305	adwc $0,%r8
	306	brb 5b
	307
	308
	309
	310	# .title vax_bn_add_words unsigned add of two arrays
	311	#;
	312	#; Richard Levitte 20-Nov-2000
	313	#;
	314	#; ULONG bn_add_words(ULONG r[], ULONG a[], ULONG b[], int n) {
	315	#; ULONG c = 0;
	316	#; int i;
	317	#; for (i = 0; i < n; i++) <c,r[i]> = a[i] + b[i] + c;
	318	#; return(c);
	319	#; }
	320	#
	321
	322	ENTRY(bn_add_words,0)
	323	movl 4(%ap),%r2 # r
	324	movl 8(%ap),%r3 # a
	325	movl 12(%ap),%r4 # b
	326	movl 16(%ap),%r5 # n
	327	clrl %r0
	328
	329	tstl %r5
	330	bleq 1f
	331
	332	0: movl (%r3)+,%r1 # carry untouched
	333	adwc (%r4)+,%r1 # carry used and touched
	334	movl %r1,(%r2)+ # carry untouched
	335	sobgtr %r5,0b # carry untouched
	336
	337	adwc $0,%r0
	338	1: ret
	339
	340	#;
	341	#; Richard Levitte 20-Nov-2000
	342	#;
	343	#; ULONG bn_sub_words(ULONG r[], ULONG a[], ULONG b[], int n) {
	344	#; ULONG c = 0;
	345	#; int i;
	346	#; for (i = 0; i < n; i++) <c,r[i]> = a[i] - b[i] - c;
	347	#; return(c);
	348	#; }
	349	#
	350
	351	ENTRY(bn_sub_words,R6)
	352	movl 4(%ap),%r2 # r
	353	movl 8(%ap),%r3 # a
	354	movl 12(%ap),%r4 # b
	355	movl 16(%ap),%r5 # n
	356	clrl %r0
	357
	358	tstl %r5
	359	bleq 1f
	360
	361	0: movl (%r3)+,%r6 # carry untouched
	362	sbwc (%r4)+,%r6 # carry used and touched
	363	movl %r6,(%r2)+ # carry untouched
	364	sobgtr %r5,0b # carry untouched
	365
	366	1: adwc $0,%r0
	367	ret
	368
	369	#
	370	# Ragge 20-Sep-2003
	371	#
	372	# Multiply a vector of 4/8 longword by another.
	373	# Uses two loops and 16/64 emuls.
	374	#
	375
	376	ENTRY(bn_mul_comba4,R6\|R7\|R8\|R9)
	377	movl $4,%r9 # 4*4
	378	brb 6f
	379
	380	ENTRY(bn_mul_comba8,R6\|R7\|R8\|R9)
	381	movl $8,%r9 # 8*8
	382
	383	6: movl 8(%ap),%r3 # a[]
	384	movl 12(%ap),%r7 # b[]
	385	brb 5f
	386
	387	ENTRY(bn_sqr_comba4,R6\|R7\|R8\|R9)
	388	movl $4,%r9 # 4*4
	389	brb 0f
	390
	391	ENTRY(bn_sqr_comba8,R6\|R7\|R8\|R9)
	392	movl $8,%r9 # 8*8
	393
	394	0:
	395	movl 8(%ap),%r3 # a[]
	396	movl %r3,%r7 # a[]
	397
	398	5: movl 4(%ap),%r5 # r[]
	399	movl %r9,%r8
	400
	401	clrq (%r5) # clear destinatino, for add.
	402	clrq 8(%r5)
	403	clrq 16(%r5) # these only needed for comba8
	404	clrq 24(%r5)
	405
	406	2: clrl %r4 # carry
	407	movl %r9,%r6 # inner loop count
	408	movl (%r7)+,%r2 # value to multiply with
	409
	410	1: emul %r2,(%r3),%r4,%r0
	411	tstl %r4
	412	bgeq 3f
	413	incl %r1
	414	3: tstl %r2
	415	bgeq 3f
	416	addl2 (%r3),%r1
	417	3: tstl (%r3)
	418	bgeq 3f
	419	addl2 %r2,%r1
	420
	421	3: addl2 %r0,(%r5)+ # add to destination
	422	adwc $0,%r1 # remember carry
	423	movl %r1,%r4 # add carry in next emul
	424	addl2 $4,%r3
	425	sobgtr %r6,1b
	426
	427	movl %r4,(%r5) # save highest add result
	428
	429	ashl $2,%r9,%r4
	430	subl2 %r4,%r3
	431	subl2 $4,%r4
	432	subl2 %r4,%r5
	433
	434	sobgtr %r8,2b
	435
	436	ret