OpenSSL 0.9.7 stable 2002 05 08 merge

author: beck <> 2002-05-15 02:29:21 +0000
committer: beck <> 2002-05-15 02:29:21 +0000
commit: b64270d1e45fe7f3241e4c9b6ce60d5ac89bc2e9 (patch)
tree: fa27cf82a1250b64ed3bf5f4a18c7354d470bbcc /src/lib/libcrypto/bn/asm/vms.mar
parent: e471e1ea98d673597b182ea85f29e30c97cd08b5 (diff)
download: openbsd-b64270d1e45fe7f3241e4c9b6ce60d5ac89bc2e9.tar.gz
openbsd-b64270d1e45fe7f3241e4c9b6ce60d5ac89bc2e9.tar.bz2
openbsd-b64270d1e45fe7f3241e4c9b6ce60d5ac89bc2e9.zip
1 files changed, 207 insertions, 490 deletions
diff --git a/src/lib/libcrypto/bn/asm/vms.mar b/src/lib/libcrypto/bn/asm/vms.mar
index ac9d57d7b0..465f2774b6 100644
--- a/src/lib/libcrypto/bn/asm/vms.mar
+++ b/src/lib/libcrypto/bn/asm/vms.mar
@@ -162,442 +162,237 @@ n=12 ;(AP)	n	by value (input)
        movl    #1,r0                   ; return SS$_NORMAL
        ret
-        .title  (generated)
+        .title  vax_bn_div_words  unsigned divide
+;
-        .psect  code,nowrt
+; Richard Levitte 20-Nov-2000
+;
-.entry  BN_DIV_WORDS,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10>
+; ULONG bn_div_words(ULONG h, ULONG l, ULONG d)
-        subl2   #4,sp
+; {
+;       return ((ULONG)((((ULLONG)h)<<32)|l) / (ULLONG)d);
-        clrl    r9
+; }
-        movl    #2,r8
+;
+; Using EDIV would be very easy, if it didn't do signed calculations.
-        tstl    12(ap)
+; Therefore, som extra things have to happen around it.  The way to
-        bneq    noname.2
+; handle that is to shift all operands right one step (basically dividing
-        mnegl   #1,r10
+; them by 2) and handle the different cases depending on what the lowest
-        brw     noname.3
+; bit of each operand was.
-        tstl    r0
+;
-        nop     
+; To start with, let's define the following:
-noname.2:
+;
+; a' = l & 1
-        pushl   12(ap)
+; a2 = <h,l> >> 1       # UNSIGNED shift!
-        calls   #1,BN_NUM_BITS_WORD
+; b' = d & 1
-        movl    r0,r7
+; b2 = d >> 1           # UNSIGNED shift!
+;
-        cmpl    r7,#32
+; Now, use EDIV to calculate a quotient and a remainder:
-        beql    noname.4
+;
-        ashl    r7,#1,r2
+; q'' = a2/b2
-        cmpl    4(ap),r2
+; r'' = a2 - q''*b2
-        blequ   noname.4
+;
+; If b' is 0, the quotient is already correct, we just need to adjust the
-        pushl   r7
+; remainder:
-        calls   #1,BN_DIV_WORDS_ABORT
+;
-noname.4:
+; if (b' == 0)
+;   {
-        subl3   r7,#32,r7
+;     r = 2*r'' + a'
+;     q = q''
-        movl    12(ap),r2
+;   }
-        cmpl    4(ap),r2
+;
-        blssu   noname.5
+; If b' is 1, we need to do other adjustements.  The first thought is the
-        subl2   r2,4(ap)
+; following (note that r' will not always have the right value, but an
-noname.5:
+; adjustement follows further down):
+;
-        tstl    r7
+; if (b' == 1)
-        beql    noname.6
+;   {
+;     q' = q''
-        ashl    r7,r2,12(ap)
+;     r' = a - q'*b
+;
-        ashl    r7,4(ap),r4
+; However, one can note the folowing relationship:
-        subl3   r7,#32,r3
+;
-        subl3   r3,#32,r2
+;                         r'' = a2 - q''*b2
-        extzv   r3,r2,8(ap),r2
+;                  =>   2*r'' = 2*a2 - 2*q''*b2
-        bisl3   r4,r2,4(ap)
+;                             = { a = 2*a2 + a', b = 2*b2 + b' = 2*b2 + 1,
+;                                 q' = q'' }
-        ashl    r7,8(ap),8(ap)
+;                             = a - a' - q'*(b - 1)
-noname.6:
+;                             = a - q'*b - a' + q'
+;                             = r' - a' + q'
-        bicl3   #65535,12(ap),r2
+;                  =>     r'  = 2*r'' - q' + a'
-        extzv   #16,#16,r2,r5
+;
+; This enables us to use r'' instead of discarding and calculating another
-        bicl3   #-65536,12(ap),r6
+; modulo:
+;
-noname.7:
+; if (b' == 1)
+;   {
-        moval   4(ap),r2
+;     q' = q''
-        movzwl  2(r2),r0
+;     r' = (r'' << 1) - q' + a'
-        cmpl    r0,r5
+;
-        bneq    noname.8
+; Now, all we have to do is adjust r', because it might be < 0:
+;
-        movzwl  #65535,r4
+;     while (r' < 0)
-        brb     noname.9
+;       {
-noname.8:
+;         r' = r' + b
+;         q' = q' - 1
-        clrl    r1
+;       }
-        movl    (r2),r0
+;   }
-        movl    r5,r2
+;
-        bgeq    vcg.1
+; return q'
-        cmpl    r2,r0
-        bgtru   vcg.2
-        incl    r1
-        brb     vcg.2
-        nop     
-vcg.1:
-        ediv    r2,r0,r1,r0
-vcg.2:
-        movl    r1,r4
-noname.9:
-noname.10:
-        mull3   r5,r4,r0
-        subl3   r0,4(ap),r3
-        bicl3   #65535,r3,r0
-        bneq    noname.13
-        mull3   r6,r4,r2
-        ashl    #16,r3,r1
-        bicl3   #65535,8(ap),r0
-        extzv   #16,#16,r0,r0
-        addl2   r0,r1
-        cmpl    r2,r1
-        bgtru   noname.12
-noname.11:
-        brb     noname.13
-        nop     
-noname.12:
-        decl    r4
-        brb     noname.10
-noname.13:
-        mull3   r5,r4,r1
-        mull3   r6,r4,r0
-        extzv   #16,#16,r0,r3
-        ashl    #16,r0,r2
-        bicl3   #65535,r2,r0
-        addl2   r3,r1
-        moval   8(ap),r3
-        cmpl    (r3),r0
-        bgequ   noname.15
-        incl    r1
-noname.15:
-        subl2   r0,(r3)
-        cmpl    4(ap),r1
-        bgequ   noname.16
-        addl2   12(ap),4(ap)
-        decl    r4
-noname.16:
-        subl2   r1,4(ap)
-        decl    r8
-        beql    noname.18
-noname.17:
-        ashl    #16,r4,r9
-        ashl    #16,4(ap),r2
+h=4 ;(AP)       h       by value (input)
-        movzwl  2(r3),r0
+l=8 ;(AP)       l       by value (input)
-        bisl2   r0,r2
+d=12 ;(AP)      d       by value (input)
-        bicl3   #0,r2,4(ap)
-        bicl3   #-65536,(r3),r0
+;aprim=r5
-        ashl    #16,r0,(r3)
+;a2=r6
-        brw     noname.7
+;a20=r6
-        nop     
+;a21=r7
-noname.18:
+;bprim=r8
+;b2=r9
+;qprim=r10      ; initially used as q''
+;rprim=r11      ; initially used as r''
-        bisl2   r4,r9
-        movl    r9,r10
+        .psect  code,nowrt
-noname.3:
+.entry  bn_div_words,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10,r11>
+        movl    l(ap),r2
+        movl    h(ap),r3
+        movl    d(ap),r4
+        movl    #0,r5
+        movl    #0,r8
+        movl    #0,r0
+;       movl    #0,r1
+        rotl    #-1,r2,r6       ; a20 = l >> 1 (almost)
+        rotl    #-1,r3,r7       ; a21 = h >> 1 (almost)
+        rotl    #-1,r4,r9       ; b2 = d >> 1 (almost)
+        tstl    r6
+        bgeq    1$
+        xorl2   #^X80000000,r6  ; fixup a20 so highest bit is 0
+        incl    r5              ; a' = 1
+1$:
+        tstl    r7
+        bgeq    2$
+        xorl2   #^X80000000,r6  ; fixup a20 so highest bit is 1,
+                                ; since that's what was lowest in a21
+        xorl2   #^X80000000,r7  ; fixup a21 so highest bit is 1
+2$:
+        tstl    r9
+        beql    666$            ; Uh-oh, the divisor is 0...
+        bgtr    3$
+        xorl2   #^X80000000,r9  ; fixup b2 so highest bit is 0
+        incl    r8              ; b' = 1
+3$:
+        tstl    r9
+        bneq    4$              ; if b2 is 0, we know that b' is 1
+        tstl    r3
+        bneq    666$            ; if higher half isn't 0, we overflow
+        movl    r2,r10          ; otherwise, we have our result
+        brb     42$             ; This is a success, really.
+4$:
+        ediv    r9,r6,r10,r11
+        tstl    r8
+        bneq    5$              ; If b' != 0, go to the other part
+;       addl3   r11,r11,r1
+;       addl2   r5,r1
+        brb     42$
+5$:
+        ashl    #1,r11,r11
+        subl2   r10,r11
+        addl2   r5,r11
+        bgeq    7$
+6$:
+        decl    r10
+        addl2   r4,r11
+        blss    6$
+7$:
+;       movl    r11,r1
+42$:
        movl    r10,r0
-        ret     
+666$:
-        tstl    r0
+        ret
-        .psect  code,nowrt
+        .title  vax_bn_add_words  unsigned add of two arrays
+;
-.entry  BN_ADD_WORDS,^m<r2,r3,r4,r5,r6,r7>
+; Richard Levitte 20-Nov-2000
+;
-        tstl    16(ap)
+; ULONG bn_add_words(ULONG r[], ULONG a[], ULONG b[], int n) {
-        bgtr    noname.21
+;       ULONG c = 0;
-        clrl    r7
+;       int i;
-        brw     noname.22
+;       for (i = 0; i < n; i++) <c,r[i]> = a[i] + b[i] + c;
-noname.21:
+;       return(c);
+; }
-        clrl    r4
-        tstl    r0
-noname.23:
-        movl    8(ap),r6
-        addl3   r4,(r6),r2
-        bicl2   #0,r2
-        clrl    r0
-        cmpl    r2,r4
-        bgequ   vcg.3
-        incl    r0
-vcg.3:
-        movl    r0,r4
-        movl    12(ap),r5
-        addl3   (r5),r2,r1
-        bicl2   #0,r1
-        clrl    r0
-        cmpl    r1,r2
-        bgequ   vcg.4
-        incl    r0
-vcg.4:
-        addl2   r0,r4
-        movl    4(ap),r3
-        movl    r1,(r3)
-        decl    16(ap)
-        bgtr    gen.1
-        brw     noname.25
-gen.1:
-noname.24:
-        addl3   r4,4(r6),r2
-        bicl2   #0,r2
-        clrl    r0
-        cmpl    r2,r4
-        bgequ   vcg.5
-        incl    r0
-vcg.5:
-        movl    r0,r4
-        addl3   4(r5),r2,r1
-        bicl2   #0,r1
-        clrl    r0
-        cmpl    r1,r2
-        bgequ   vcg.6
-        incl    r0
-vcg.6:
-        addl2   r0,r4
-        movl    r1,4(r3)
-        decl    16(ap)
-        bleq    noname.25
-noname.26:
-        addl3   r4,8(r6),r2
-        bicl2   #0,r2
-        clrl    r0
-        cmpl    r2,r4
-        bgequ   vcg.7
-        incl    r0
-vcg.7:
-        movl    r0,r4
-        addl3   8(r5),r2,r1
-        bicl2   #0,r1
-        clrl    r0
-        cmpl    r1,r2
-        bgequ   vcg.8
-        incl    r0
-vcg.8:
-        addl2   r0,r4
-        movl    r1,8(r3)
-        decl    16(ap)
-        bleq    noname.25
-noname.27:
-        addl3   r4,12(r6),r2
-        bicl2   #0,r2
-        clrl    r0
-        cmpl    r2,r4
-        bgequ   vcg.9
-        incl    r0
-vcg.9:
-        movl    r0,r4
-        addl3   12(r5),r2,r1
-        bicl2   #0,r1
-        clrl    r0
-        cmpl    r1,r2
-        bgequ   vcg.10
-        incl    r0
-vcg.10:
-        addl2   r0,r4
-        movl    r1,12(r3)
+r=4 ;(AP)       r       by reference (output)
+a=8 ;(AP)       a       by reference (input)
+b=12 ;(AP)      b       by reference (input)
+n=16 ;(AP)      n       by value (input)
-        decl    16(ap)
-        bleq    noname.25
-noname.28:
-        addl3   #16,r6,8(ap)
+        .psect  code,nowrt
-        addl3   #16,r5,12(ap)
+.entry  bn_add_words,^m<r2,r3,r4,r5,r6>
-        addl3   #16,r3,4(ap)
+        moval   @r(ap),r2
-        brw     noname.23
+        moval   @a(ap),r3
-        tstl    r0
+        moval   @b(ap),r4
-noname.25:
+        movl    n(ap),r5        ; assumed >0 by C code
+        clrl    r0              ; c
-        movl    r4,r7
+        tstl    r5              ; carry = 0
+        bleq    666$
-noname.22:
+0$:
-        movl    r7,r0
+        movl    (r3)+,r6        ; carry untouched
-        ret     
+        adwc    (r4)+,r6        ; carry used and touched
-        nop     
+        movl    r6,(r2)+        ; carry untouched
+        sobgtr  r5,0$           ; carry untouched
+        adwc    #0,r0
+666$:
+        ret
+        .title  vax_bn_sub_words  unsigned add of two arrays
+;
+; Richard Levitte 20-Nov-2000
+;
+; ULONG bn_sub_words(ULONG r[], ULONG a[], ULONG b[], int n) {
+;       ULONG c = 0;
+;       int i;
+;       for (i = 0; i < n; i++) <c,r[i]> = a[i] - b[i] - c;
+;       return(c);
+; }
-;r=4 ;(AP)
+r=4 ;(AP)       r       by reference (output)
-;a=8 ;(AP)
+a=8 ;(AP)       a       by reference (input)
-;b=12 ;(AP)
+b=12 ;(AP)      b       by reference (input)
-;n=16 ;(AP)     n       by value (input)
+n=16 ;(AP)      n       by value (input)
-        .psect  code,nowrt
-.entry  BN_SUB_WORDS,^m<r2,r3,r4,r5,r6,r7>
+        .psect  code,nowrt
-        clrl    r6
+.entry  bn_sub_words,^m<r2,r3,r4,r5,r6>
-        tstl    16(ap)
+        moval   @r(ap),r2
-        bgtr    noname.31
+        moval   @a(ap),r3
-        clrl    r7
+        moval   @b(ap),r4
-        brw     noname.32
+        movl    n(ap),r5        ; assumed >0 by C code
-        tstl    r0
+        clrl    r0              ; c
-noname.31:
-noname.33:
+        tstl    r5              ; carry = 0
+        bleq    666$
-        movl    8(ap),r5
+0$:
-        movl    (r5),r1
+        movl    (r3)+,r6        ; carry untouched
-        movl    12(ap),r4
+        sbwc    (r4)+,r6        ; carry used and touched
-        movl    (r4),r2
+        movl    r6,(r2)+        ; carry untouched
+        sobgtr  r5,0$           ; carry untouched
-        movl    4(ap),r3
-        subl3   r2,r1,r0
-        subl2   r6,r0
-        bicl3   #0,r0,(r3)
-        cmpl    r1,r2
-        beql    noname.34
-        clrl    r0
-        cmpl    r1,r2
-        bgequ   vcg.11
-        incl    r0
-vcg.11:
-        movl    r0,r6
-noname.34:
-        decl    16(ap)
-        bgtr    gen.2
-        brw     noname.36
-gen.2:
-noname.35:
-        movl    4(r5),r2
-        movl    4(r4),r1
-        subl3   r1,r2,r0
-        subl2   r6,r0
-        bicl3   #0,r0,4(r3)
-        cmpl    r2,r1
-        beql    noname.37
-        clrl    r0
-        cmpl    r2,r1
-        bgequ   vcg.12
-        incl    r0
-vcg.12:
-        movl    r0,r6
-noname.37:
-        decl    16(ap)
-        bleq    noname.36
-noname.38:
-        movl    8(r5),r1
-        movl    8(r4),r2
-        subl3   r2,r1,r0
-        subl2   r6,r0
-        bicl3   #0,r0,8(r3)
-        cmpl    r1,r2
-        beql    noname.39
-        clrl    r0
-        cmpl    r1,r2
-        bgequ   vcg.13
-        incl    r0
-vcg.13:
-        movl    r0,r6
-noname.39:
-        decl    16(ap)
-        bleq    noname.36
-noname.40:
-        movl    12(r5),r1
-        movl    12(r4),r2
-        subl3   r2,r1,r0
-        subl2   r6,r0
-        bicl3   #0,r0,12(r3)
-        cmpl    r1,r2
-        beql    noname.41
-        clrl    r0
-        cmpl    r1,r2
-        bgequ   vcg.14
-        incl    r0
-vcg.14:
-        movl    r0,r6
-noname.41:
-        decl    16(ap)
-        bleq    noname.36
-noname.42:
-        addl3   #16,r5,8(ap)
-        addl3   #16,r4,12(ap)
-        addl3   #16,r3,4(ap)
-        brw     noname.33
-        tstl    r0
-noname.36:
-        movl    r6,r7
-noname.32:
-        movl    r7,r0
-        ret     
-        nop     
+        adwc    #0,r0
+666$:
+        ret
 ;r=4 ;(AP)
@@ -6615,81 +6410,3 @@ noname.610:
 ; For now, the code below doesn't work, so I end this prematurely.
 .end
-        .title  vax_bn_div64    division 64/32=>32
-; 
-; r.l. 16-jan-1998
-;
-; unsigned int bn_div64(unsigned long h, unsigned long l, unsigned long d)
-;       return <h,l>/d;
-;
-        .psect  code,nowrt
-h=4 ;(AP)       by value (input)
-l=8 ;(AP)       by value (input)
-d=12 ;(AP)      by value (input)
-.entry  bn_div64,^m<r2,r3,r4,r5,r6,r7,r8,r9>
-        movl    l(ap),r2        ; l
-        movl    h(ap),r3        ; h
-        movl    d(ap),r4        ; d
-        clrl    r5              ; q
-        clrl    r6              ; r
-        ; Treat "negative" specially
-        tstl    r3
-        blss    30$
-        tstl    r4
-        beql    90$
-        ediv    r4,r2,r5,r6
-        bvs     666$
-        movl    r5,r0
-        ret
-30$:
-        ; The theory here is to do some harmless shifting and a little
-        ; bit of rounding (brackets are to designate when decimals are
-        ; cut off):
-        ;
-        ;       result = 2 * [ ([<h,0>/2] + [d/2]) / d ] + [ l / d ]
-        movl    #0,r7
-        movl    r3,r8           ; copy h
-        ashq    #-1,r7,r7       ; [<h,0>/2] => <r8,r7>
-        bicl2   #^X80000000,r8  ; Remove "sign"
-        movl    r4,r9           ; copy d
-        ashl    #-1,r9,r9       ; [d/2] => r9
-        bicl2   #^X80000000,r9  ; Remove "sign"
-        addl2   r9,r7
-        adwc    #0,r8           ; [<h,0>/2] + [d/2] => <r8,r7>
-        ediv    r4,r7,r5,r6     ; [ ([<h,0>/2] + [d/2]) / d ] => <r5,r6>
-        bvs     666$
-        movl    #0,r6
-        ashq    #1,r5,r5        ; 2 * [ ([<h,0>/2] + [d/2]) / d ] => r5
-        movl    #0,r3
-        ediv    r4,r2,r8,r9     ; [ l / d ] => <r8,r9>
-        addl2   r8,r5           ;
-        bcs     666$
-        movl    r5,r0
-        ret
-                
-90$:
-        movl    #-1,r0
-        ret
-666$:
-        
-.end
author	beck <>	2002-05-15 02:29:21 +0000
committer	beck <>	2002-05-15 02:29:21 +0000
commit	b64270d1e45fe7f3241e4c9b6ce60d5ac89bc2e9 (patch)
tree	fa27cf82a1250b64ed3bf5f4a18c7354d470bbcc /src/lib/libcrypto/bn/asm/vms.mar
parent	e471e1ea98d673597b182ea85f29e30c97cd08b5 (diff)
download	openbsd-b64270d1e45fe7f3241e4c9b6ce60d5ac89bc2e9.tar.gz openbsd-b64270d1e45fe7f3241e4c9b6ce60d5ac89bc2e9.tar.bz2 openbsd-b64270d1e45fe7f3241e4c9b6ce60d5ac89bc2e9.zip

diff --git a/src/lib/libcrypto/bn/asm/vms.mar b/src/lib/libcrypto/bn/asm/vms.mar index ac9d57d7b0..465f2774b6 100644 --- a/src/lib/libcrypto/bn/asm/vms.mar +++ b/src/lib/libcrypto/bn/asm/vms.mar
@@ -162,442 +162,237 @@ n=12 ;(AP) n by value (input)
162	movl #1,r0 ; return SS$_NORMAL	162	movl #1,r0 ; return SS$_NORMAL
163	ret	163	ret
164		164
165	.title (generated)	165	.title vax_bn_div_words unsigned divide
166		166	;
167	.psect code,nowrt	167	; Richard Levitte 20-Nov-2000
168		168	;
169	.entry BN_DIV_WORDS,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10>	169	; ULONG bn_div_words(ULONG h, ULONG l, ULONG d)
170	subl2 #4,sp	170	; {
171		171	; return ((ULONG)((((ULLONG)h)<<32)\|l) / (ULLONG)d);
172	clrl r9	172	; }
173	movl #2,r8	173	;
174		174	; Using EDIV would be very easy, if it didn't do signed calculations.
175	tstl 12(ap)	175	; Therefore, som extra things have to happen around it. The way to
176	bneq noname.2	176	; handle that is to shift all operands right one step (basically dividing
177	mnegl #1,r10	177	; them by 2) and handle the different cases depending on what the lowest
178	brw noname.3	178	; bit of each operand was.
179	tstl r0	179	;
180	nop	180	; To start with, let's define the following:
181	noname.2:	181	;
182		182	; a' = l & 1
183	pushl 12(ap)	183	; a2 = <h,l> >> 1 # UNSIGNED shift!
184	calls #1,BN_NUM_BITS_WORD	184	; b' = d & 1
185	movl r0,r7	185	; b2 = d >> 1 # UNSIGNED shift!
186		186	;
187	cmpl r7,#32	187	; Now, use EDIV to calculate a quotient and a remainder:
188	beql noname.4	188	;
189	ashl r7,#1,r2	189	; q'' = a2/b2
190	cmpl 4(ap),r2	190	; r'' = a2 - q''*b2
191	blequ noname.4	191	;
192		192	; If b' is 0, the quotient is already correct, we just need to adjust the
193	pushl r7	193	; remainder:
194	calls #1,BN_DIV_WORDS_ABORT	194	;
195	noname.4:	195	; if (b' == 0)
196		196	; {
197	subl3 r7,#32,r7	197	; r = 2*r'' + a'
198		198	; q = q''
199	movl 12(ap),r2	199	; }
200	cmpl 4(ap),r2	200	;
201	blssu noname.5	201	; If b' is 1, we need to do other adjustements. The first thought is the
202	subl2 r2,4(ap)	202	; following (note that r' will not always have the right value, but an
203	noname.5:	203	; adjustement follows further down):
204		204	;
205	tstl r7	205	; if (b' == 1)
206	beql noname.6	206	; {
207		207	; q' = q''
208	ashl r7,r2,12(ap)	208	; r' = a - q'*b
209		209	;
210	ashl r7,4(ap),r4	210	; However, one can note the folowing relationship:
211	subl3 r7,#32,r3	211	;
212	subl3 r3,#32,r2	212	; r'' = a2 - q''*b2
213	extzv r3,r2,8(ap),r2	213	; => 2r'' = 2a2 - 2q''b2
214	bisl3 r4,r2,4(ap)	214	; = { a = 2a2 + a', b = 2b2 + b' = 2*b2 + 1,
215		215	; q' = q'' }
216	ashl r7,8(ap),8(ap)	216	; = a - a' - q'*(b - 1)
217	noname.6:	217	; = a - q'*b - a' + q'
218		218	; = r' - a' + q'
219	bicl3 #65535,12(ap),r2	219	; => r' = 2*r'' - q' + a'
220	extzv #16,#16,r2,r5	220	;
221		221	; This enables us to use r'' instead of discarding and calculating another
222	bicl3 #-65536,12(ap),r6	222	; modulo:
223		223	;
224	noname.7:	224	; if (b' == 1)
225		225	; {
226	moval 4(ap),r2	226	; q' = q''
227	movzwl 2(r2),r0	227	; r' = (r'' << 1) - q' + a'
228	cmpl r0,r5	228	;
229	bneq noname.8	229	; Now, all we have to do is adjust r', because it might be < 0:
230		230	;
231	movzwl #65535,r4	231	; while (r' < 0)
232	brb noname.9	232	; {
233	noname.8:	233	; r' = r' + b
234		234	; q' = q' - 1
235	clrl r1	235	; }
236	movl (r2),r0	236	; }
237	movl r5,r2	237	;
238	bgeq vcg.1	238	; return q'
239	cmpl r2,r0
240	bgtru vcg.2
241	incl r1
242	brb vcg.2
243	nop
244	vcg.1:
245	ediv r2,r0,r1,r0
246	vcg.2:
247	movl r1,r4
248	noname.9:
249
250	noname.10:
251
252	mull3 r5,r4,r0
253	subl3 r0,4(ap),r3
254
255	bicl3 #65535,r3,r0
256	bneq noname.13
257	mull3 r6,r4,r2
258	ashl #16,r3,r1
259	bicl3 #65535,8(ap),r0
260	extzv #16,#16,r0,r0
261	addl2 r0,r1
262	cmpl r2,r1
263	bgtru noname.12
264	noname.11:
265
266	brb noname.13
267	nop
268	noname.12:
269
270	decl r4
271	brb noname.10
272	noname.13:
273
274	mull3 r5,r4,r1
275
276	mull3 r6,r4,r0
277
278	extzv #16,#16,r0,r3
279
280	ashl #16,r0,r2
281	bicl3 #65535,r2,r0
282
283	addl2 r3,r1
284
285	moval 8(ap),r3
286	cmpl (r3),r0
287	bgequ noname.15
288	incl r1
289	noname.15:
290
291	subl2 r0,(r3)
292
293	cmpl 4(ap),r1
294	bgequ noname.16
295
296	addl2 12(ap),4(ap)
297
298	decl r4
299	noname.16:
300
301	subl2 r1,4(ap)
302
303	decl r8
304	beql noname.18
305	noname.17:
306
307	ashl #16,r4,r9
308		239
309	ashl #16,4(ap),r2	240	h=4 ;(AP) h by value (input)
310	movzwl 2(r3),r0	241	l=8 ;(AP) l by value (input)
311	bisl2 r0,r2	242	d=12 ;(AP) d by value (input)
312	bicl3 #0,r2,4(ap)
313		243
314	bicl3 #-65536,(r3),r0	244	;aprim=r5
315	ashl #16,r0,(r3)	245	;a2=r6
316	brw noname.7	246	;a20=r6
317	nop	247	;a21=r7
318	noname.18:	248	;bprim=r8
		249	;b2=r9
		250	;qprim=r10 ; initially used as q''
		251	;rprim=r11 ; initially used as r''
319		252
320	bisl2 r4,r9
321		253
322	movl r9,r10	254	.psect code,nowrt
323		255
324	noname.3:	256	.entry bn_div_words,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10,r11>
		257	movl l(ap),r2
		258	movl h(ap),r3
		259	movl d(ap),r4
		260
		261	movl #0,r5
		262	movl #0,r8
		263	movl #0,r0
		264	; movl #0,r1
		265
		266	rotl #-1,r2,r6 ; a20 = l >> 1 (almost)
		267	rotl #-1,r3,r7 ; a21 = h >> 1 (almost)
		268	rotl #-1,r4,r9 ; b2 = d >> 1 (almost)
		269
		270	tstl r6
		271	bgeq 1$
		272	xorl2 #^X80000000,r6 ; fixup a20 so highest bit is 0
		273	incl r5 ; a' = 1
		274	1$:
		275	tstl r7
		276	bgeq 2$
		277	xorl2 #^X80000000,r6 ; fixup a20 so highest bit is 1,
		278	; since that's what was lowest in a21
		279	xorl2 #^X80000000,r7 ; fixup a21 so highest bit is 1
		280	2$:
		281	tstl r9
		282	beql 666$ ; Uh-oh, the divisor is 0...
		283	bgtr 3$
		284	xorl2 #^X80000000,r9 ; fixup b2 so highest bit is 0
		285	incl r8 ; b' = 1
		286	3$:
		287	tstl r9
		288	bneq 4$ ; if b2 is 0, we know that b' is 1
		289	tstl r3
		290	bneq 666$ ; if higher half isn't 0, we overflow
		291	movl r2,r10 ; otherwise, we have our result
		292	brb 42$ ; This is a success, really.
		293	4$:
		294	ediv r9,r6,r10,r11
		295
		296	tstl r8
		297	bneq 5$ ; If b' != 0, go to the other part
		298	; addl3 r11,r11,r1
		299	; addl2 r5,r1
		300	brb 42$
		301	5$:
		302	ashl #1,r11,r11
		303	subl2 r10,r11
		304	addl2 r5,r11
		305	bgeq 7$
		306	6$:
		307	decl r10
		308	addl2 r4,r11
		309	blss 6$
		310	7$:
		311	; movl r11,r1
		312	42$:
325	movl r10,r0	313	movl r10,r0
326	ret	314	666$:
327	tstl r0	315	ret
328
329		316
330	.psect code,nowrt	317	.title vax_bn_add_words unsigned add of two arrays
331		318	;
332	.entry BN_ADD_WORDS,^m<r2,r3,r4,r5,r6,r7>	319	; Richard Levitte 20-Nov-2000
333		320	;
334	tstl 16(ap)	321	; ULONG bn_add_words(ULONG r[], ULONG a[], ULONG b[], int n) {
335	bgtr noname.21	322	; ULONG c = 0;
336	clrl r7	323	; int i;
337	brw noname.22	324	; for (i = 0; i < n; i++) <c,r[i]> = a[i] + b[i] + c;
338	noname.21:	325	; return(c);
339		326	; }
340	clrl r4
341
342	tstl r0
343	noname.23:
344
345	movl 8(ap),r6
346	addl3 r4,(r6),r2
347
348	bicl2 #0,r2
349
350	clrl r0
351	cmpl r2,r4
352	bgequ vcg.3
353	incl r0
354	vcg.3:
355	movl r0,r4
356
357	movl 12(ap),r5
358	addl3 (r5),r2,r1
359	bicl2 #0,r1
360
361	clrl r0
362	cmpl r1,r2
363	bgequ vcg.4
364	incl r0
365	vcg.4:
366	addl2 r0,r4
367
368	movl 4(ap),r3
369	movl r1,(r3)
370
371	decl 16(ap)
372	bgtr gen.1
373	brw noname.25
374	gen.1:
375	noname.24:
376
377	addl3 r4,4(r6),r2
378
379	bicl2 #0,r2
380
381	clrl r0
382	cmpl r2,r4
383	bgequ vcg.5
384	incl r0
385	vcg.5:
386	movl r0,r4
387
388	addl3 4(r5),r2,r1
389	bicl2 #0,r1
390
391	clrl r0
392	cmpl r1,r2
393	bgequ vcg.6
394	incl r0
395	vcg.6:
396	addl2 r0,r4
397
398	movl r1,4(r3)
399
400	decl 16(ap)
401	bleq noname.25
402	noname.26:
403
404	addl3 r4,8(r6),r2
405
406	bicl2 #0,r2
407
408	clrl r0
409	cmpl r2,r4
410	bgequ vcg.7
411	incl r0
412	vcg.7:
413	movl r0,r4
414
415	addl3 8(r5),r2,r1
416	bicl2 #0,r1
417
418	clrl r0
419	cmpl r1,r2
420	bgequ vcg.8
421	incl r0
422	vcg.8:
423	addl2 r0,r4
424
425	movl r1,8(r3)
426
427	decl 16(ap)
428	bleq noname.25
429	noname.27:
430
431	addl3 r4,12(r6),r2
432
433	bicl2 #0,r2
434
435	clrl r0
436	cmpl r2,r4
437	bgequ vcg.9
438	incl r0
439	vcg.9:
440	movl r0,r4
441
442	addl3 12(r5),r2,r1
443	bicl2 #0,r1
444
445	clrl r0
446	cmpl r1,r2
447	bgequ vcg.10
448	incl r0
449	vcg.10:
450	addl2 r0,r4
451		327
452	movl r1,12(r3)	328	r=4 ;(AP) r by reference (output)
		329	a=8 ;(AP) a by reference (input)
		330	b=12 ;(AP) b by reference (input)
		331	n=16 ;(AP) n by value (input)
453		332
454	decl 16(ap)
455	bleq noname.25
456	noname.28:
457		333
458	addl3 #16,r6,8(ap)	334	.psect code,nowrt
459		335
460	addl3 #16,r5,12(ap)	336	.entry bn_add_words,^m<r2,r3,r4,r5,r6>
461		337
462	addl3 #16,r3,4(ap)	338	moval @r(ap),r2
463	brw noname.23	339	moval @a(ap),r3
464	tstl r0	340	moval @b(ap),r4
465	noname.25:	341	movl n(ap),r5 ; assumed >0 by C code
		342	clrl r0 ; c
466		343
467	movl r4,r7	344	tstl r5 ; carry = 0
		345	bleq 666$
468		346
469	noname.22:	347	0$:
470	movl r7,r0	348	movl (r3)+,r6 ; carry untouched
471	ret	349	adwc (r4)+,r6 ; carry used and touched
472	nop	350	movl r6,(r2)+ ; carry untouched
		351	sobgtr r5,0$ ; carry untouched
473		352
		353	adwc #0,r0
		354	666$:
		355	ret
474		356
		357	.title vax_bn_sub_words unsigned add of two arrays
		358	;
		359	; Richard Levitte 20-Nov-2000
		360	;
		361	; ULONG bn_sub_words(ULONG r[], ULONG a[], ULONG b[], int n) {
		362	; ULONG c = 0;
		363	; int i;
		364	; for (i = 0; i < n; i++) <c,r[i]> = a[i] - b[i] - c;
		365	; return(c);
		366	; }
475		367
476	;r=4 ;(AP)	368	r=4 ;(AP) r by reference (output)
477	;a=8 ;(AP)	369	a=8 ;(AP) a by reference (input)
478	;b=12 ;(AP)	370	b=12 ;(AP) b by reference (input)
479	;n=16 ;(AP) n by value (input)	371	n=16 ;(AP) n by value (input)
480		372
481	.psect code,nowrt
482		373
483	.entry BN_SUB_WORDS,^m<r2,r3,r4,r5,r6,r7>	374	.psect code,nowrt
484		375
485	clrl r6	376	.entry bn_sub_words,^m<r2,r3,r4,r5,r6>
486		377
487	tstl 16(ap)	378	moval @r(ap),r2
488	bgtr noname.31	379	moval @a(ap),r3
489	clrl r7	380	moval @b(ap),r4
490	brw noname.32	381	movl n(ap),r5 ; assumed >0 by C code
491	tstl r0	382	clrl r0 ; c
492	noname.31:
493		383
494	noname.33:	384	tstl r5 ; carry = 0
		385	bleq 666$
495		386
496	movl 8(ap),r5	387	0$:
497	movl (r5),r1	388	movl (r3)+,r6 ; carry untouched
498	movl 12(ap),r4	389	sbwc (r4)+,r6 ; carry used and touched
499	movl (r4),r2	390	movl r6,(r2)+ ; carry untouched
500		391	sobgtr r5,0$ ; carry untouched
501	movl 4(ap),r3
502	subl3 r2,r1,r0
503	subl2 r6,r0
504	bicl3 #0,r0,(r3)
505
506	cmpl r1,r2
507	beql noname.34
508	clrl r0
509	cmpl r1,r2
510	bgequ vcg.11
511	incl r0
512	vcg.11:
513	movl r0,r6
514	noname.34:
515
516	decl 16(ap)
517	bgtr gen.2
518	brw noname.36
519	gen.2:
520	noname.35:
521
522	movl 4(r5),r2
523	movl 4(r4),r1
524
525	subl3 r1,r2,r0
526	subl2 r6,r0
527	bicl3 #0,r0,4(r3)
528
529	cmpl r2,r1
530	beql noname.37
531	clrl r0
532	cmpl r2,r1
533	bgequ vcg.12
534	incl r0
535	vcg.12:
536	movl r0,r6
537	noname.37:
538
539	decl 16(ap)
540	bleq noname.36
541	noname.38:
542
543	movl 8(r5),r1
544	movl 8(r4),r2
545
546	subl3 r2,r1,r0
547	subl2 r6,r0
548	bicl3 #0,r0,8(r3)
549
550	cmpl r1,r2
551	beql noname.39
552	clrl r0
553	cmpl r1,r2
554	bgequ vcg.13
555	incl r0
556	vcg.13:
557	movl r0,r6
558	noname.39:
559
560	decl 16(ap)
561	bleq noname.36
562	noname.40:
563
564	movl 12(r5),r1
565	movl 12(r4),r2
566
567	subl3 r2,r1,r0
568	subl2 r6,r0
569	bicl3 #0,r0,12(r3)
570
571	cmpl r1,r2
572	beql noname.41
573	clrl r0
574	cmpl r1,r2
575	bgequ vcg.14
576	incl r0
577	vcg.14:
578	movl r0,r6
579	noname.41:
580
581	decl 16(ap)
582	bleq noname.36
583	noname.42:
584
585	addl3 #16,r5,8(ap)
586
587	addl3 #16,r4,12(ap)
588
589	addl3 #16,r3,4(ap)
590	brw noname.33
591	tstl r0
592	noname.36:
593
594	movl r6,r7
595
596	noname.32:
597	movl r7,r0
598	ret
599	nop
600		392
		393	adwc #0,r0
		394	666$:
		395	ret
601		396
602		397
603	;r=4 ;(AP)	398	;r=4 ;(AP)
@@ -6615,81 +6410,3 @@ noname.610:
6615		6410
6616	; For now, the code below doesn't work, so I end this prematurely.	6411	; For now, the code below doesn't work, so I end this prematurely.
6617	.end	6412	.end
6618
6619	.title vax_bn_div64 division 64/32=>32
6620	;
6621	; r.l. 16-jan-1998
6622	;
6623	; unsigned int bn_div64(unsigned long h, unsigned long l, unsigned long d)
6624	; return <h,l>/d;
6625	;
6626
6627	.psect code,nowrt
6628
6629	h=4 ;(AP) by value (input)
6630	l=8 ;(AP) by value (input)
6631	d=12 ;(AP) by value (input)
6632
6633	.entry bn_div64,^m<r2,r3,r4,r5,r6,r7,r8,r9>
6634
6635	movl l(ap),r2 ; l
6636	movl h(ap),r3 ; h
6637	movl d(ap),r4 ; d
6638	clrl r5 ; q
6639	clrl r6 ; r
6640
6641	; Treat "negative" specially
6642	tstl r3
6643	blss 30$
6644
6645	tstl r4
6646	beql 90$
6647
6648	ediv r4,r2,r5,r6
6649	bvs 666$
6650
6651	movl r5,r0
6652	ret
6653
6654	30$:
6655	; The theory here is to do some harmless shifting and a little
6656	; bit of rounding (brackets are to designate when decimals are
6657	; cut off):
6658	;
6659	; result = 2 * [ ([<h,0>/2] + [d/2]) / d ] + [ l / d ]
6660
6661	movl #0,r7
6662	movl r3,r8 ; copy h
6663	ashq #-1,r7,r7 ; [<h,0>/2] => <r8,r7>
6664	bicl2 #^X80000000,r8 ; Remove "sign"
6665
6666	movl r4,r9 ; copy d
6667	ashl #-1,r9,r9 ; [d/2] => r9
6668	bicl2 #^X80000000,r9 ; Remove "sign"
6669
6670	addl2 r9,r7
6671	adwc #0,r8 ; [<h,0>/2] + [d/2] => <r8,r7>
6672
6673	ediv r4,r7,r5,r6 ; [ ([<h,0>/2] + [d/2]) / d ] => <r5,r6>
6674	bvs 666$
6675
6676	movl #0,r6
6677	ashq #1,r5,r5 ; 2 * [ ([<h,0>/2] + [d/2]) / d ] => r5
6678
6679	movl #0,r3
6680	ediv r4,r2,r8,r9 ; [ l / d ] => <r8,r9>
6681
6682	addl2 r8,r5 ;
6683	bcs 666$
6684
6685	movl r5,r0
6686	ret
6687
6688	90$:
6689	movl #-1,r0
6690	ret
6691
6692	666$:
6693
6694
6695	.end