diff options
| author | beck <> | 1999-09-29 05:53:45 +0000 |
|---|---|---|
| committer | beck <> | 1999-09-29 05:53:45 +0000 |
| commit | 648e4f0876a3773381cbfff3192dd84dd1c8c925 (patch) | |
| tree | bd9d01e3969ffa5aac92128af3e515520c88fc0e /src/lib/libcrypto/bn/asm | |
| parent | 756086c41b0487beefc3d5b3400f80095d0e4157 (diff) | |
| download | openbsd-648e4f0876a3773381cbfff3192dd84dd1c8c925.tar.gz openbsd-648e4f0876a3773381cbfff3192dd84dd1c8c925.tar.bz2 openbsd-648e4f0876a3773381cbfff3192dd84dd1c8c925.zip | |
new files for OpenSSL 0.9.4
Diffstat (limited to 'src/lib/libcrypto/bn/asm')
30 files changed, 3561 insertions, 0 deletions
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/add.pl b/src/lib/libcrypto/bn/asm/alpha.works/add.pl new file mode 100644 index 0000000000..4dc76e6b69 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/add.pl | |||
| @@ -0,0 +1,119 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_add_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $bp=&wparam(2); | ||
| 15 | $count=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | &br(&label("finish")); | ||
| 23 | &blt($count,&label("finish")); | ||
| 24 | |||
| 25 | ($a0,$b0)=&NR(2); | ||
| 26 | &ld($a0,&QWPw(0,$ap)); | ||
| 27 | &ld($b0,&QWPw(0,$bp)); | ||
| 28 | |||
| 29 | ########################################################## | ||
| 30 | &set_label("loop"); | ||
| 31 | |||
| 32 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
| 33 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
| 34 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
| 35 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
| 36 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
| 37 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
| 38 | |||
| 39 | ($o0,$t0)=&NR(2); | ||
| 40 | &add($a0,$b0,$o0); | ||
| 41 | &cmpult($o0,$b0,$t0); | ||
| 42 | &add($o0,$cc,$o0); | ||
| 43 | &cmpult($o0,$cc,$cc); | ||
| 44 | &add($cc,$t0,$cc); &FR($t0); | ||
| 45 | |||
| 46 | ($t1,$o1)=&NR(2); | ||
| 47 | |||
| 48 | &add($a1,$b1,$o1); &FR($a1); | ||
| 49 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
| 50 | &add($o1,$cc,$o1); | ||
| 51 | &cmpult($o1,$cc,$cc); | ||
| 52 | &add($cc,$t1,$cc); &FR($t1); | ||
| 53 | |||
| 54 | ($t2,$o2)=&NR(2); | ||
| 55 | |||
| 56 | &add($a2,$b2,$o2); &FR($a2); | ||
| 57 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
| 58 | &add($o2,$cc,$o2); | ||
| 59 | &cmpult($o2,$cc,$cc); | ||
| 60 | &add($cc,$t2,$cc); &FR($t2); | ||
| 61 | |||
| 62 | ($t3,$o3)=&NR(2); | ||
| 63 | |||
| 64 | &add($a3,$b3,$o3); &FR($a3); | ||
| 65 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
| 66 | &add($o3,$cc,$o3); | ||
| 67 | &cmpult($o3,$cc,$cc); | ||
| 68 | &add($cc,$t3,$cc); &FR($t3); | ||
| 69 | |||
| 70 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
| 71 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
| 72 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
| 73 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
| 74 | |||
| 75 | &sub($count,4,$count); # count-=4 | ||
| 76 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 77 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 78 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 79 | |||
| 80 | &blt($count,&label("finish")); | ||
| 81 | &ld($a0,&QWPw(0,$ap)); | ||
| 82 | &ld($b0,&QWPw(0,$bp)); | ||
| 83 | &br(&label("loop")); | ||
| 84 | ################################################## | ||
| 85 | # Do the last 0..3 words | ||
| 86 | |||
| 87 | ($t0,$o0)=&NR(2); | ||
| 88 | &set_label("last_loop"); | ||
| 89 | |||
| 90 | &ld($a0,&QWPw(0,$ap)); # get a | ||
| 91 | &ld($b0,&QWPw(0,$bp)); # get b | ||
| 92 | |||
| 93 | &add($a0,$b0,$o0); | ||
| 94 | &cmpult($o0,$b0,$t0); # will we borrow? | ||
| 95 | &add($o0,$cc,$o0); # will we borrow? | ||
| 96 | &cmpult($o0,$cc,$cc); # will we borrow? | ||
| 97 | &add($cc,$t0,$cc); # add the borrows | ||
| 98 | &st($o0,&QWPw(0,$rp)); # save | ||
| 99 | |||
| 100 | &add($ap,$QWS,$ap); | ||
| 101 | &add($bp,$QWS,$bp); | ||
| 102 | &add($rp,$QWS,$rp); | ||
| 103 | &sub($count,1,$count); | ||
| 104 | &bgt($count,&label("last_loop")); | ||
| 105 | &function_end_A($name); | ||
| 106 | |||
| 107 | ###################################################### | ||
| 108 | &set_label("finish"); | ||
| 109 | &add($count,4,$count); | ||
| 110 | &bgt($count,&label("last_loop")); | ||
| 111 | |||
| 112 | &FR($o0,$t0,$a0,$b0); | ||
| 113 | &set_label("end"); | ||
| 114 | &function_end($name); | ||
| 115 | |||
| 116 | &fin_pool; | ||
| 117 | } | ||
| 118 | |||
| 119 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/div.pl b/src/lib/libcrypto/bn/asm/alpha.works/div.pl new file mode 100644 index 0000000000..7ec144377f --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/div.pl | |||
| @@ -0,0 +1,144 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | |||
| 3 | sub bn_div64 | ||
| 4 | { | ||
| 5 | local($data)=<<'EOF'; | ||
| 6 | # | ||
| 7 | # What follows was taken directly from the C compiler with a few | ||
| 8 | # hacks to redo the lables. | ||
| 9 | # | ||
| 10 | .text | ||
| 11 | .set noreorder | ||
| 12 | .set volatile | ||
| 13 | .align 3 | ||
| 14 | .globl bn_div64 | ||
| 15 | .ent bn_div64 | ||
| 16 | bn_div64: | ||
| 17 | ldgp $29,0($27) | ||
| 18 | bn_div64..ng: | ||
| 19 | lda $30,-48($30) | ||
| 20 | .frame $30,48,$26,0 | ||
| 21 | stq $26,0($30) | ||
| 22 | stq $9,8($30) | ||
| 23 | stq $10,16($30) | ||
| 24 | stq $11,24($30) | ||
| 25 | stq $12,32($30) | ||
| 26 | stq $13,40($30) | ||
| 27 | .mask 0x4003e00,-48 | ||
| 28 | .prologue 1 | ||
| 29 | bis $16,$16,$9 | ||
| 30 | bis $17,$17,$10 | ||
| 31 | bis $18,$18,$11 | ||
| 32 | bis $31,$31,$13 | ||
| 33 | bis $31,2,$12 | ||
| 34 | bne $11,$9119 | ||
| 35 | lda $0,-1 | ||
| 36 | br $31,$9136 | ||
| 37 | .align 4 | ||
| 38 | $9119: | ||
| 39 | bis $11,$11,$16 | ||
| 40 | jsr $26,BN_num_bits_word | ||
| 41 | ldgp $29,0($26) | ||
| 42 | subq $0,64,$1 | ||
| 43 | beq $1,$9120 | ||
| 44 | bis $31,1,$1 | ||
| 45 | sll $1,$0,$1 | ||
| 46 | cmpule $9,$1,$1 | ||
| 47 | bne $1,$9120 | ||
| 48 | # lda $16,_IO_stderr_ | ||
| 49 | # lda $17,$C32 | ||
| 50 | # bis $0,$0,$18 | ||
| 51 | # jsr $26,fprintf | ||
| 52 | # ldgp $29,0($26) | ||
| 53 | jsr $26,abort | ||
| 54 | ldgp $29,0($26) | ||
| 55 | .align 4 | ||
| 56 | $9120: | ||
| 57 | bis $31,64,$3 | ||
| 58 | cmpult $9,$11,$2 | ||
| 59 | subq $3,$0,$1 | ||
| 60 | addl $1,$31,$0 | ||
| 61 | subq $9,$11,$1 | ||
| 62 | cmoveq $2,$1,$9 | ||
| 63 | beq $0,$9122 | ||
| 64 | zapnot $0,15,$2 | ||
| 65 | subq $3,$0,$1 | ||
| 66 | sll $11,$2,$11 | ||
| 67 | sll $9,$2,$3 | ||
| 68 | srl $10,$1,$1 | ||
| 69 | sll $10,$2,$10 | ||
| 70 | bis $3,$1,$9 | ||
| 71 | $9122: | ||
| 72 | srl $11,32,$5 | ||
| 73 | zapnot $11,15,$6 | ||
| 74 | lda $7,-1 | ||
| 75 | .align 5 | ||
| 76 | $9123: | ||
| 77 | srl $9,32,$1 | ||
| 78 | subq $1,$5,$1 | ||
| 79 | bne $1,$9126 | ||
| 80 | zapnot $7,15,$27 | ||
| 81 | br $31,$9127 | ||
| 82 | .align 4 | ||
| 83 | $9126: | ||
| 84 | bis $9,$9,$24 | ||
| 85 | bis $5,$5,$25 | ||
| 86 | divqu $24,$25,$27 | ||
| 87 | $9127: | ||
| 88 | srl $10,32,$4 | ||
| 89 | .align 5 | ||
| 90 | $9128: | ||
| 91 | mulq $27,$5,$1 | ||
| 92 | subq $9,$1,$3 | ||
| 93 | zapnot $3,240,$1 | ||
| 94 | bne $1,$9129 | ||
| 95 | mulq $6,$27,$2 | ||
| 96 | sll $3,32,$1 | ||
| 97 | addq $1,$4,$1 | ||
| 98 | cmpule $2,$1,$2 | ||
| 99 | bne $2,$9129 | ||
| 100 | subq $27,1,$27 | ||
| 101 | br $31,$9128 | ||
| 102 | .align 4 | ||
| 103 | $9129: | ||
| 104 | mulq $27,$6,$1 | ||
| 105 | mulq $27,$5,$4 | ||
| 106 | srl $1,32,$3 | ||
| 107 | sll $1,32,$1 | ||
| 108 | addq $4,$3,$4 | ||
| 109 | cmpult $10,$1,$2 | ||
| 110 | subq $10,$1,$10 | ||
| 111 | addq $2,$4,$2 | ||
| 112 | cmpult $9,$2,$1 | ||
| 113 | bis $2,$2,$4 | ||
| 114 | beq $1,$9134 | ||
| 115 | addq $9,$11,$9 | ||
| 116 | subq $27,1,$27 | ||
| 117 | $9134: | ||
| 118 | subl $12,1,$12 | ||
| 119 | subq $9,$4,$9 | ||
| 120 | beq $12,$9124 | ||
| 121 | sll $27,32,$13 | ||
| 122 | sll $9,32,$2 | ||
| 123 | srl $10,32,$1 | ||
| 124 | sll $10,32,$10 | ||
| 125 | bis $2,$1,$9 | ||
| 126 | br $31,$9123 | ||
| 127 | .align 4 | ||
| 128 | $9124: | ||
| 129 | bis $13,$27,$0 | ||
| 130 | $9136: | ||
| 131 | ldq $26,0($30) | ||
| 132 | ldq $9,8($30) | ||
| 133 | ldq $10,16($30) | ||
| 134 | ldq $11,24($30) | ||
| 135 | ldq $12,32($30) | ||
| 136 | ldq $13,40($30) | ||
| 137 | addq $30,48,$30 | ||
| 138 | ret $31,($26),1 | ||
| 139 | .end bn_div64 | ||
| 140 | EOF | ||
| 141 | &asm_add($data); | ||
| 142 | } | ||
| 143 | |||
| 144 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul.pl new file mode 100644 index 0000000000..b182bae452 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul.pl | |||
| @@ -0,0 +1,116 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_mul_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r,$couny); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $count=&wparam(2); | ||
| 15 | $word=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | &br(&label("finish")); | ||
| 23 | &blt($count,&label("finish")); | ||
| 24 | |||
| 25 | ($a0,$r0)=&NR(2); | ||
| 26 | &ld($a0,&QWPw(0,$ap)); | ||
| 27 | &ld($r0,&QWPw(0,$rp)); | ||
| 28 | |||
| 29 | $a=<<'EOF'; | ||
| 30 | ########################################################## | ||
| 31 | &set_label("loop"); | ||
| 32 | |||
| 33 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
| 34 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
| 35 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
| 36 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
| 37 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
| 38 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
| 39 | |||
| 40 | ($o0,$t0)=&NR(2); | ||
| 41 | &add($a0,$b0,$o0); | ||
| 42 | &cmpult($o0,$b0,$t0); | ||
| 43 | &add($o0,$cc,$o0); | ||
| 44 | &cmpult($o0,$cc,$cc); | ||
| 45 | &add($cc,$t0,$cc); &FR($t0); | ||
| 46 | |||
| 47 | ($t1,$o1)=&NR(2); | ||
| 48 | |||
| 49 | &add($a1,$b1,$o1); &FR($a1); | ||
| 50 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
| 51 | &add($o1,$cc,$o1); | ||
| 52 | &cmpult($o1,$cc,$cc); | ||
| 53 | &add($cc,$t1,$cc); &FR($t1); | ||
| 54 | |||
| 55 | ($t2,$o2)=&NR(2); | ||
| 56 | |||
| 57 | &add($a2,$b2,$o2); &FR($a2); | ||
| 58 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
| 59 | &add($o2,$cc,$o2); | ||
| 60 | &cmpult($o2,$cc,$cc); | ||
| 61 | &add($cc,$t2,$cc); &FR($t2); | ||
| 62 | |||
| 63 | ($t3,$o3)=&NR(2); | ||
| 64 | |||
| 65 | &add($a3,$b3,$o3); &FR($a3); | ||
| 66 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
| 67 | &add($o3,$cc,$o3); | ||
| 68 | &cmpult($o3,$cc,$cc); | ||
| 69 | &add($cc,$t3,$cc); &FR($t3); | ||
| 70 | |||
| 71 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
| 72 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
| 73 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
| 74 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
| 75 | |||
| 76 | &sub($count,4,$count); # count-=4 | ||
| 77 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 78 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 79 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 80 | |||
| 81 | &blt($count,&label("finish")); | ||
| 82 | &ld($a0,&QWPw(0,$ap)); | ||
| 83 | &ld($b0,&QWPw(0,$bp)); | ||
| 84 | &br(&label("loop")); | ||
| 85 | EOF | ||
| 86 | ################################################## | ||
| 87 | # Do the last 0..3 words | ||
| 88 | |||
| 89 | &set_label("last_loop"); | ||
| 90 | |||
| 91 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
| 92 | &mul($a0,$word,($l0)=&NR(1)); | ||
| 93 | &add($ap,$QWS,$ap); | ||
| 94 | &muh($a0,$word,($h0)=&NR(1)); &FR($a0); | ||
| 95 | &add($l0,$cc,$l0); | ||
| 96 | &add($rp,$QWS,$rp); | ||
| 97 | &sub($count,1,$count); | ||
| 98 | &cmpult($l0,$cc,$cc); | ||
| 99 | &st($l0,&QWPw(-1,$rp)); &FR($l0); | ||
| 100 | &add($h0,$cc,$cc); &FR($h0); | ||
| 101 | |||
| 102 | &bgt($count,&label("last_loop")); | ||
| 103 | &function_end_A($name); | ||
| 104 | |||
| 105 | ###################################################### | ||
| 106 | &set_label("finish"); | ||
| 107 | &add($count,4,$count); | ||
| 108 | &bgt($count,&label("last_loop")); | ||
| 109 | |||
| 110 | &set_label("end"); | ||
| 111 | &function_end($name); | ||
| 112 | |||
| 113 | &fin_pool; | ||
| 114 | } | ||
| 115 | |||
| 116 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl new file mode 100644 index 0000000000..e37f6315fb --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl | |||
| @@ -0,0 +1,120 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_mul_add_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r,$couny); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $count=&wparam(2); | ||
| 15 | $word=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | &br(&label("finish")); | ||
| 23 | &blt($count,&label("finish")); | ||
| 24 | |||
| 25 | ($a0,$r0)=&NR(2); | ||
| 26 | &ld($a0,&QWPw(0,$ap)); | ||
| 27 | &ld($r0,&QWPw(0,$rp)); | ||
| 28 | |||
| 29 | $a=<<'EOF'; | ||
| 30 | ########################################################## | ||
| 31 | &set_label("loop"); | ||
| 32 | |||
| 33 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
| 34 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
| 35 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
| 36 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
| 37 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
| 38 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
| 39 | |||
| 40 | ($o0,$t0)=&NR(2); | ||
| 41 | &add($a0,$b0,$o0); | ||
| 42 | &cmpult($o0,$b0,$t0); | ||
| 43 | &add($o0,$cc,$o0); | ||
| 44 | &cmpult($o0,$cc,$cc); | ||
| 45 | &add($cc,$t0,$cc); &FR($t0); | ||
| 46 | |||
| 47 | ($t1,$o1)=&NR(2); | ||
| 48 | |||
| 49 | &add($a1,$b1,$o1); &FR($a1); | ||
| 50 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
| 51 | &add($o1,$cc,$o1); | ||
| 52 | &cmpult($o1,$cc,$cc); | ||
| 53 | &add($cc,$t1,$cc); &FR($t1); | ||
| 54 | |||
| 55 | ($t2,$o2)=&NR(2); | ||
| 56 | |||
| 57 | &add($a2,$b2,$o2); &FR($a2); | ||
| 58 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
| 59 | &add($o2,$cc,$o2); | ||
| 60 | &cmpult($o2,$cc,$cc); | ||
| 61 | &add($cc,$t2,$cc); &FR($t2); | ||
| 62 | |||
| 63 | ($t3,$o3)=&NR(2); | ||
| 64 | |||
| 65 | &add($a3,$b3,$o3); &FR($a3); | ||
| 66 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
| 67 | &add($o3,$cc,$o3); | ||
| 68 | &cmpult($o3,$cc,$cc); | ||
| 69 | &add($cc,$t3,$cc); &FR($t3); | ||
| 70 | |||
| 71 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
| 72 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
| 73 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
| 74 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
| 75 | |||
| 76 | &sub($count,4,$count); # count-=4 | ||
| 77 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 78 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 79 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 80 | |||
| 81 | &blt($count,&label("finish")); | ||
| 82 | &ld($a0,&QWPw(0,$ap)); | ||
| 83 | &ld($b0,&QWPw(0,$bp)); | ||
| 84 | &br(&label("loop")); | ||
| 85 | EOF | ||
| 86 | ################################################## | ||
| 87 | # Do the last 0..3 words | ||
| 88 | |||
| 89 | &set_label("last_loop"); | ||
| 90 | |||
| 91 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
| 92 | &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b | ||
| 93 | &mul($a0,$word,($l0)=&NR(1)); | ||
| 94 | &sub($count,1,$count); | ||
| 95 | &add($ap,$QWS,$ap); | ||
| 96 | &muh($a0,$word,($h0)=&NR(1)); &FR($a0); | ||
| 97 | &add($r0,$l0,$r0); | ||
| 98 | &add($rp,$QWS,$rp); | ||
| 99 | &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); | ||
| 100 | &add($r0,$cc,$r0); | ||
| 101 | &add($h0,$t0,$h0); &FR($t0); | ||
| 102 | &cmpult($r0,$cc,$cc); | ||
| 103 | &st($r0,&QWPw(-1,$rp)); &FR($r0); | ||
| 104 | &add($h0,$cc,$cc); &FR($h0); | ||
| 105 | |||
| 106 | &bgt($count,&label("last_loop")); | ||
| 107 | &function_end_A($name); | ||
| 108 | |||
| 109 | ###################################################### | ||
| 110 | &set_label("finish"); | ||
| 111 | &add($count,4,$count); | ||
| 112 | &bgt($count,&label("last_loop")); | ||
| 113 | |||
| 114 | &set_label("end"); | ||
| 115 | &function_end($name); | ||
| 116 | |||
| 117 | &fin_pool; | ||
| 118 | } | ||
| 119 | |||
| 120 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl new file mode 100644 index 0000000000..5efd201281 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl | |||
| @@ -0,0 +1,213 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub mul_add_c | ||
| 5 | { | ||
| 6 | local($a,$b,$c0,$c1,$c2)=@_; | ||
| 7 | local($l1,$h1,$t1,$t2); | ||
| 8 | |||
| 9 | &mul($a,$b,($l1)=&NR(1)); | ||
| 10 | &muh($a,$b,($h1)=&NR(1)); | ||
| 11 | &add($c0,$l1,$c0); | ||
| 12 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
| 13 | &add($t1,$h1,$h1); &FR($t1); | ||
| 14 | &add($c1,$h1,$c1); | ||
| 15 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
| 16 | &add($c2,$t2,$c2); &FR($t2); | ||
| 17 | } | ||
| 18 | |||
| 19 | sub bn_mul_comba4 | ||
| 20 | { | ||
| 21 | local($name)=@_; | ||
| 22 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 23 | |||
| 24 | $cnt=1; | ||
| 25 | &init_pool(3); | ||
| 26 | |||
| 27 | $rp=&wparam(0); | ||
| 28 | $ap=&wparam(1); | ||
| 29 | $bp=&wparam(2); | ||
| 30 | |||
| 31 | &function_begin($name,""); | ||
| 32 | |||
| 33 | &comment(""); | ||
| 34 | |||
| 35 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 36 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
| 37 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 38 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
| 39 | &mul($a[0],$b[0],($r00)=&NR(1)); | ||
| 40 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 41 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
| 42 | &muh($a[0],$b[0],($r01)=&NR(1)); | ||
| 43 | &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
| 44 | &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp)); | ||
| 45 | &mul($a[0],$b[1],($r02)=&NR(1)); | ||
| 46 | |||
| 47 | ($R,$H1,$H2)=&NR(3); | ||
| 48 | |||
| 49 | &st($r00,&QWPw(0,$rp)); &FR($r00); | ||
| 50 | |||
| 51 | &mov("zero",$R); | ||
| 52 | &mul($a[1],$b[0],($r03)=&NR(1)); | ||
| 53 | |||
| 54 | &mov("zero",$H1); | ||
| 55 | &mov("zero",$H0); | ||
| 56 | &add($R,$r01,$R); | ||
| 57 | &muh($a[0],$b[1],($r04)=&NR(1)); | ||
| 58 | &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01); | ||
| 59 | &add($R,$r02,$R); | ||
| 60 | &add($H1,$t01,$H1) &FR($t01); | ||
| 61 | &muh($a[1],$b[0],($r05)=&NR(1)); | ||
| 62 | &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02); | ||
| 63 | &add($R,$r03,$R); | ||
| 64 | &add($H2,$t02,$H2) &FR($t02); | ||
| 65 | &mul($a[0],$b[2],($r06)=&NR(1)); | ||
| 66 | &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03); | ||
| 67 | &add($H1,$t03,$H1) &FR($t03); | ||
| 68 | &st($R,&QWPw(1,$rp)); | ||
| 69 | &add($H1,$H2,$R); | ||
| 70 | |||
| 71 | &mov("zero",$H1); | ||
| 72 | &add($R,$r04,$R); | ||
| 73 | &mov("zero",$H2); | ||
| 74 | &mul($a[1],$b[1],($r07)=&NR(1)); | ||
| 75 | &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04); | ||
| 76 | &add($R,$r05,$R); | ||
| 77 | &add($H1,$t04,$H1) &FR($t04); | ||
| 78 | &mul($a[2],$b[0],($r08)=&NR(1)); | ||
| 79 | &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05); | ||
| 80 | &add($R,$r01,$R); | ||
| 81 | &add($H2,$t05,$H2) &FR($t05); | ||
| 82 | &muh($a[0],$b[2],($r09)=&NR(1)); | ||
| 83 | &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06); | ||
| 84 | &add($R,$r07,$R); | ||
| 85 | &add($H1,$t06,$H1) &FR($t06); | ||
| 86 | &muh($a[1],$b[1],($r10)=&NR(1)); | ||
| 87 | &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07); | ||
| 88 | &add($R,$r08,$R); | ||
| 89 | &add($H2,$t07,$H2) &FR($t07); | ||
| 90 | &muh($a[2],$b[0],($r11)=&NR(1)); | ||
| 91 | &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08); | ||
| 92 | &add($H1,$t08,$H1) &FR($t08); | ||
| 93 | &st($R,&QWPw(2,$rp)); | ||
| 94 | &add($H1,$H2,$R); | ||
| 95 | |||
| 96 | &mov("zero",$H1); | ||
| 97 | &add($R,$r09,$R); | ||
| 98 | &mov("zero",$H2); | ||
| 99 | &mul($a[0],$b[3],($r12)=&NR(1)); | ||
| 100 | &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09); | ||
| 101 | &add($R,$r10,$R); | ||
| 102 | &add($H1,$t09,$H1) &FR($t09); | ||
| 103 | &mul($a[1],$b[2],($r13)=&NR(1)); | ||
| 104 | &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10); | ||
| 105 | &add($R,$r11,$R); | ||
| 106 | &add($H1,$t10,$H1) &FR($t10); | ||
| 107 | &mul($a[2],$b[1],($r14)=&NR(1)); | ||
| 108 | &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11); | ||
| 109 | &add($R,$r12,$R); | ||
| 110 | &add($H1,$t11,$H1) &FR($t11); | ||
| 111 | &mul($a[3],$b[0],($r15)=&NR(1)); | ||
| 112 | &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12); | ||
| 113 | &add($R,$r13,$R); | ||
| 114 | &add($H1,$t12,$H1) &FR($t12); | ||
| 115 | &muh($a[0],$b[3],($r16)=&NR(1)); | ||
| 116 | &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13); | ||
| 117 | &add($R,$r14,$R); | ||
| 118 | &add($H1,$t13,$H1) &FR($t13); | ||
| 119 | &muh($a[1],$b[2],($r17)=&NR(1)); | ||
| 120 | &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14); | ||
| 121 | &add($R,$r15,$R); | ||
| 122 | &add($H1,$t14,$H1) &FR($t14); | ||
| 123 | &muh($a[2],$b[1],($r18)=&NR(1)); | ||
| 124 | &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15); | ||
| 125 | &add($H1,$t15,$H1) &FR($t15); | ||
| 126 | &st($R,&QWPw(3,$rp)); | ||
| 127 | &add($H1,$H2,$R); | ||
| 128 | |||
| 129 | &mov("zero",$H1); | ||
| 130 | &add($R,$r16,$R); | ||
| 131 | &mov("zero",$H2); | ||
| 132 | &muh($a[3],$b[0],($r19)=&NR(1)); | ||
| 133 | &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16); | ||
| 134 | &add($R,$r17,$R); | ||
| 135 | &add($H1,$t16,$H1) &FR($t16); | ||
| 136 | &mul($a[1],$b[3],($r20)=&NR(1)); | ||
| 137 | &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17); | ||
| 138 | &add($R,$r18,$R); | ||
| 139 | &add($H1,$t17,$H1) &FR($t17); | ||
| 140 | &mul($a[2],$b[2],($r21)=&NR(1)); | ||
| 141 | &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18); | ||
| 142 | &add($R,$r19,$R); | ||
| 143 | &add($H1,$t18,$H1) &FR($t18); | ||
| 144 | &mul($a[3],$b[1],($r22)=&NR(1)); | ||
| 145 | &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19); | ||
| 146 | &add($R,$r20,$R); | ||
| 147 | &add($H1,$t19,$H1) &FR($t19); | ||
| 148 | &muh($a[1],$b[3],($r23)=&NR(1)); | ||
| 149 | &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20); | ||
| 150 | &add($R,$r21,$R); | ||
| 151 | &add($H1,$t20,$H1) &FR($t20); | ||
| 152 | &muh($a[2],$b[2],($r24)=&NR(1)); | ||
| 153 | &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21); | ||
| 154 | &add($R,$r22,$R); | ||
| 155 | &add($H1,$t21,$H1) &FR($t21); | ||
| 156 | &muh($a[3],$b[1],($r25)=&NR(1)); | ||
| 157 | &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22); | ||
| 158 | &add($H1,$t22,$H1) &FR($t22); | ||
| 159 | &st($R,&QWPw(4,$rp)); | ||
| 160 | &add($H1,$H2,$R); | ||
| 161 | |||
| 162 | &mov("zero",$H1); | ||
| 163 | &add($R,$r23,$R); | ||
| 164 | &mov("zero",$H2); | ||
| 165 | &mul($a[2],$b[3],($r26)=&NR(1)); | ||
| 166 | &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23); | ||
| 167 | &add($R,$r24,$R); | ||
| 168 | &add($H1,$t23,$H1) &FR($t23); | ||
| 169 | &mul($a[3],$b[2],($r27)=&NR(1)); | ||
| 170 | &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24); | ||
| 171 | &add($R,$r25,$R); | ||
| 172 | &add($H1,$t24,$H1) &FR($t24); | ||
| 173 | &muh($a[2],$b[3],($r28)=&NR(1)); | ||
| 174 | &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25); | ||
| 175 | &add($R,$r26,$R); | ||
| 176 | &add($H1,$t25,$H1) &FR($t25); | ||
| 177 | &muh($a[3],$b[2],($r29)=&NR(1)); | ||
| 178 | &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26); | ||
| 179 | &add($R,$r27,$R); | ||
| 180 | &add($H1,$t26,$H1) &FR($t26); | ||
| 181 | &mul($a[3],$b[3],($r30)=&NR(1)); | ||
| 182 | &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27); | ||
| 183 | &add($H1,$t27,$H1) &FR($t27); | ||
| 184 | &st($R,&QWPw(5,$rp)); | ||
| 185 | &add($H1,$H2,$R); | ||
| 186 | |||
| 187 | &mov("zero",$H1); | ||
| 188 | &add($R,$r28,$R); | ||
| 189 | &mov("zero",$H2); | ||
| 190 | &muh($a[3],$b[3],($r31)=&NR(1)); | ||
| 191 | &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28); | ||
| 192 | &add($R,$r29,$R); | ||
| 193 | &add($H1,$t28,$H1) &FR($t28); | ||
| 194 | ############ | ||
| 195 | &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29); | ||
| 196 | &add($R,$r30,$R); | ||
| 197 | &add($H1,$t29,$H1) &FR($t29); | ||
| 198 | ############ | ||
| 199 | &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30); | ||
| 200 | &add($H1,$t30,$H1) &FR($t30); | ||
| 201 | &st($R,&QWPw(6,$rp)); | ||
| 202 | &add($H1,$H2,$R); | ||
| 203 | |||
| 204 | &add($R,$r31,$R); &FR($r31); | ||
| 205 | &st($R,&QWPw(7,$rp)); | ||
| 206 | |||
| 207 | &FR($R,$H1,$H2); | ||
| 208 | &function_end($name); | ||
| 209 | |||
| 210 | &fin_pool; | ||
| 211 | } | ||
| 212 | |||
| 213 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl new file mode 100644 index 0000000000..79d86dd25c --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl | |||
| @@ -0,0 +1,98 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub mul_add_c | ||
| 5 | { | ||
| 6 | local($a,$b,$c0,$c1,$c2)=@_; | ||
| 7 | local($l1,$h1,$t1,$t2); | ||
| 8 | |||
| 9 | print STDERR "count=$cnt\n"; $cnt++; | ||
| 10 | &mul($a,$b,($l1)=&NR(1)); | ||
| 11 | &muh($a,$b,($h1)=&NR(1)); | ||
| 12 | &add($c0,$l1,$c0); | ||
| 13 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
| 14 | &add($t1,$h1,$h1); &FR($t1); | ||
| 15 | &add($c1,$h1,$c1); | ||
| 16 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
| 17 | &add($c2,$t2,$c2); &FR($t2); | ||
| 18 | } | ||
| 19 | |||
| 20 | sub bn_mul_comba4 | ||
| 21 | { | ||
| 22 | local($name)=@_; | ||
| 23 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 24 | |||
| 25 | $cnt=1; | ||
| 26 | &init_pool(3); | ||
| 27 | |||
| 28 | $rp=&wparam(0); | ||
| 29 | $ap=&wparam(1); | ||
| 30 | $bp=&wparam(2); | ||
| 31 | |||
| 32 | &function_begin($name,""); | ||
| 33 | |||
| 34 | &comment(""); | ||
| 35 | |||
| 36 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 37 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
| 38 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 39 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
| 40 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 41 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
| 42 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); | ||
| 43 | &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp); | ||
| 44 | |||
| 45 | ($c0,$c1,$c2)=&NR(3); | ||
| 46 | &mov("zero",$c2); | ||
| 47 | &mul($a[0],$b[0],$c0); | ||
| 48 | &muh($a[0],$b[0],$c1); | ||
| 49 | &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 50 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 51 | &mov("zero",$c2); | ||
| 52 | |||
| 53 | &mul_add_c($a[0],$b[1],$c0,$c1,$c2); | ||
| 54 | &mul_add_c($a[1],$b[0],$c0,$c1,$c2); | ||
| 55 | &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 56 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 57 | &mov("zero",$c2); | ||
| 58 | |||
| 59 | &mul_add_c($a[1],$b[1],$c0,$c1,$c2); | ||
| 60 | &mul_add_c($a[0],$b[2],$c0,$c1,$c2); | ||
| 61 | &mul_add_c($a[2],$b[0],$c0,$c1,$c2); | ||
| 62 | &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 63 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 64 | &mov("zero",$c2); | ||
| 65 | |||
| 66 | &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]); | ||
| 67 | &mul_add_c($a[1],$b[2],$c0,$c1,$c2); | ||
| 68 | &mul_add_c($a[2],$b[1],$c0,$c1,$c2); | ||
| 69 | &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]); | ||
| 70 | &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 71 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 72 | &mov("zero",$c2); | ||
| 73 | |||
| 74 | &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]); | ||
| 75 | &mul_add_c($a[2],$b[2],$c0,$c1,$c2); | ||
| 76 | &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]); | ||
| 77 | &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 78 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 79 | &mov("zero",$c2); | ||
| 80 | |||
| 81 | &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]); | ||
| 82 | &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]); | ||
| 83 | &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 84 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 85 | &mov("zero",$c2); | ||
| 86 | |||
| 87 | &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]); | ||
| 88 | &st($c0,&QWPw(6,$rp)); | ||
| 89 | &st($c1,&QWPw(7,$rp)); | ||
| 90 | |||
| 91 | &FR($c0,$c1,$c2); | ||
| 92 | |||
| 93 | &function_end($name); | ||
| 94 | |||
| 95 | &fin_pool; | ||
| 96 | } | ||
| 97 | |||
| 98 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl new file mode 100644 index 0000000000..525ca7494b --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl | |||
| @@ -0,0 +1,177 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_mul_comba8 | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 8 | |||
| 9 | $cnt=1; | ||
| 10 | &init_pool(3); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $bp=&wparam(2); | ||
| 15 | |||
| 16 | &function_begin($name,""); | ||
| 17 | |||
| 18 | &comment(""); | ||
| 19 | |||
| 20 | &stack_push(2); | ||
| 21 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 22 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
| 23 | &st($reg_s0,&swtmp(0)); &FR($reg_s0); | ||
| 24 | &st($reg_s1,&swtmp(1)); &FR($reg_s1); | ||
| 25 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 26 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
| 27 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 28 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
| 29 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
| 30 | &ld(($b[3])=&NR(1),&QWPw(3,$bp)); | ||
| 31 | &ld(($a[4])=&NR(1),&QWPw(1,$ap)); | ||
| 32 | &ld(($b[4])=&NR(1),&QWPw(1,$bp)); | ||
| 33 | &ld(($a[5])=&NR(1),&QWPw(1,$ap)); | ||
| 34 | &ld(($b[5])=&NR(1),&QWPw(1,$bp)); | ||
| 35 | &ld(($a[6])=&NR(1),&QWPw(1,$ap)); | ||
| 36 | &ld(($b[6])=&NR(1),&QWPw(1,$bp)); | ||
| 37 | &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap); | ||
| 38 | &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp); | ||
| 39 | |||
| 40 | ($c0,$c1,$c2)=&NR(3); | ||
| 41 | &mov("zero",$c2); | ||
| 42 | &mul($a[0],$b[0],$c0); | ||
| 43 | &muh($a[0],$b[0],$c1); | ||
| 44 | &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 45 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 46 | &mov("zero",$c2); | ||
| 47 | |||
| 48 | &mul_add_c($a[0],$b[1],$c0,$c1,$c2); | ||
| 49 | &mul_add_c($a[1],$b[0],$c0,$c1,$c2); | ||
| 50 | &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 51 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 52 | &mov("zero",$c2); | ||
| 53 | |||
| 54 | &mul_add_c($a[0],$b[2],$c0,$c1,$c2); | ||
| 55 | &mul_add_c($a[1],$b[1],$c0,$c1,$c2); | ||
| 56 | &mul_add_c($a[2],$b[0],$c0,$c1,$c2); | ||
| 57 | &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 58 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 59 | &mov("zero",$c2); | ||
| 60 | |||
| 61 | &mul_add_c($a[0],$b[3],$c0,$c1,$c2); | ||
| 62 | &mul_add_c($a[1],$b[2],$c0,$c1,$c2); | ||
| 63 | &mul_add_c($a[2],$b[1],$c0,$c1,$c2); | ||
| 64 | &mul_add_c($a[3],$b[0],$c0,$c1,$c2); | ||
| 65 | &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 66 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 67 | &mov("zero",$c2); | ||
| 68 | |||
| 69 | &mul_add_c($a[0],$b[4],$c0,$c1,$c2); | ||
| 70 | &mul_add_c($a[1],$b[3],$c0,$c1,$c2); | ||
| 71 | &mul_add_c($a[2],$b[2],$c0,$c1,$c2); | ||
| 72 | &mul_add_c($a[3],$b[1],$c0,$c1,$c2); | ||
| 73 | &mul_add_c($a[4],$b[0],$c0,$c1,$c2); | ||
| 74 | &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 75 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 76 | &mov("zero",$c2); | ||
| 77 | |||
| 78 | &mul_add_c($a[0],$b[5],$c0,$c1,$c2); | ||
| 79 | &mul_add_c($a[1],$b[4],$c0,$c1,$c2); | ||
| 80 | &mul_add_c($a[2],$b[3],$c0,$c1,$c2); | ||
| 81 | &mul_add_c($a[3],$b[2],$c0,$c1,$c2); | ||
| 82 | &mul_add_c($a[4],$b[1],$c0,$c1,$c2); | ||
| 83 | &mul_add_c($a[5],$b[0],$c0,$c1,$c2); | ||
| 84 | &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 85 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 86 | &mov("zero",$c2); | ||
| 87 | |||
| 88 | &mul_add_c($a[0],$b[6],$c0,$c1,$c2); | ||
| 89 | &mul_add_c($a[1],$b[5],$c0,$c1,$c2); | ||
| 90 | &mul_add_c($a[2],$b[4],$c0,$c1,$c2); | ||
| 91 | &mul_add_c($a[3],$b[3],$c0,$c1,$c2); | ||
| 92 | &mul_add_c($a[4],$b[2],$c0,$c1,$c2); | ||
| 93 | &mul_add_c($a[5],$b[1],$c0,$c1,$c2); | ||
| 94 | &mul_add_c($a[6],$b[0],$c0,$c1,$c2); | ||
| 95 | &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 96 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 97 | &mov("zero",$c2); | ||
| 98 | |||
| 99 | &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]); | ||
| 100 | &mul_add_c($a[1],$b[6],$c0,$c1,$c2); | ||
| 101 | &mul_add_c($a[2],$b[5],$c0,$c1,$c2); | ||
| 102 | &mul_add_c($a[3],$b[4],$c0,$c1,$c2); | ||
| 103 | &mul_add_c($a[4],$b[3],$c0,$c1,$c2); | ||
| 104 | &mul_add_c($a[5],$b[2],$c0,$c1,$c2); | ||
| 105 | &mul_add_c($a[6],$b[1],$c0,$c1,$c2); | ||
| 106 | &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]); | ||
| 107 | &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 108 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 109 | &mov("zero",$c2); | ||
| 110 | |||
| 111 | &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]); | ||
| 112 | &mul_add_c($a[2],$b[6],$c0,$c1,$c2); | ||
| 113 | &mul_add_c($a[3],$b[5],$c0,$c1,$c2); | ||
| 114 | &mul_add_c($a[4],$b[4],$c0,$c1,$c2); | ||
| 115 | &mul_add_c($a[5],$b[3],$c0,$c1,$c2); | ||
| 116 | &mul_add_c($a[6],$b[2],$c0,$c1,$c2); | ||
| 117 | &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]); | ||
| 118 | &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 119 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 120 | &mov("zero",$c2); | ||
| 121 | |||
| 122 | &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]); | ||
| 123 | &mul_add_c($a[3],$b[6],$c0,$c1,$c2); | ||
| 124 | &mul_add_c($a[4],$b[5],$c0,$c1,$c2); | ||
| 125 | &mul_add_c($a[5],$b[4],$c0,$c1,$c2); | ||
| 126 | &mul_add_c($a[6],$b[3],$c0,$c1,$c2); | ||
| 127 | &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]); | ||
| 128 | &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 129 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 130 | &mov("zero",$c2); | ||
| 131 | |||
| 132 | &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]); | ||
| 133 | &mul_add_c($a[4],$b[6],$c0,$c1,$c2); | ||
| 134 | &mul_add_c($a[5],$b[5],$c0,$c1,$c2); | ||
| 135 | &mul_add_c($a[6],$b[4],$c0,$c1,$c2); | ||
| 136 | &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]); | ||
| 137 | &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 138 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 139 | &mov("zero",$c2); | ||
| 140 | |||
| 141 | &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]); | ||
| 142 | &mul_add_c($a[5],$b[6],$c0,$c1,$c2); | ||
| 143 | &mul_add_c($a[6],$b[5],$c0,$c1,$c2); | ||
| 144 | &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]); | ||
| 145 | &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 146 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 147 | &mov("zero",$c2); | ||
| 148 | |||
| 149 | &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]); | ||
| 150 | &mul_add_c($a[6],$b[6],$c0,$c1,$c2); | ||
| 151 | &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]); | ||
| 152 | &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 153 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 154 | &mov("zero",$c2); | ||
| 155 | |||
| 156 | &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]); | ||
| 157 | &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]); | ||
| 158 | &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 159 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 160 | &mov("zero",$c2); | ||
| 161 | |||
| 162 | &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]); | ||
| 163 | &st($c0,&QWPw(14,$rp)); | ||
| 164 | &st($c1,&QWPw(15,$rp)); | ||
| 165 | |||
| 166 | &FR($c0,$c1,$c2); | ||
| 167 | |||
| 168 | &ld($reg_s0,&swtmp(0)); | ||
| 169 | &ld($reg_s1,&swtmp(1)); | ||
| 170 | &stack_pop(2); | ||
| 171 | |||
| 172 | &function_end($name); | ||
| 173 | |||
| 174 | &fin_pool; | ||
| 175 | } | ||
| 176 | |||
| 177 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl new file mode 100644 index 0000000000..a55b696906 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl | |||
| @@ -0,0 +1,113 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_sqr_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r,$couny); | ||
| 8 | |||
| 9 | &init_pool(3); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $count=&wparam(2); | ||
| 15 | |||
| 16 | &function_begin($name,""); | ||
| 17 | |||
| 18 | &comment(""); | ||
| 19 | &sub($count,4,$count); | ||
| 20 | &mov("zero",$cc); | ||
| 21 | &br(&label("finish")); | ||
| 22 | &blt($count,&label("finish")); | ||
| 23 | |||
| 24 | ($a0,$r0)=&NR(2); | ||
| 25 | &ld($a0,&QWPw(0,$ap)); | ||
| 26 | &ld($r0,&QWPw(0,$rp)); | ||
| 27 | |||
| 28 | $a=<<'EOF'; | ||
| 29 | ########################################################## | ||
| 30 | &set_label("loop"); | ||
| 31 | |||
| 32 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
| 33 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
| 34 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
| 35 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
| 36 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
| 37 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
| 38 | |||
| 39 | ($o0,$t0)=&NR(2); | ||
| 40 | &add($a0,$b0,$o0); | ||
| 41 | &cmpult($o0,$b0,$t0); | ||
| 42 | &add($o0,$cc,$o0); | ||
| 43 | &cmpult($o0,$cc,$cc); | ||
| 44 | &add($cc,$t0,$cc); &FR($t0); | ||
| 45 | |||
| 46 | ($t1,$o1)=&NR(2); | ||
| 47 | |||
| 48 | &add($a1,$b1,$o1); &FR($a1); | ||
| 49 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
| 50 | &add($o1,$cc,$o1); | ||
| 51 | &cmpult($o1,$cc,$cc); | ||
| 52 | &add($cc,$t1,$cc); &FR($t1); | ||
| 53 | |||
| 54 | ($t2,$o2)=&NR(2); | ||
| 55 | |||
| 56 | &add($a2,$b2,$o2); &FR($a2); | ||
| 57 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
| 58 | &add($o2,$cc,$o2); | ||
| 59 | &cmpult($o2,$cc,$cc); | ||
| 60 | &add($cc,$t2,$cc); &FR($t2); | ||
| 61 | |||
| 62 | ($t3,$o3)=&NR(2); | ||
| 63 | |||
| 64 | &add($a3,$b3,$o3); &FR($a3); | ||
| 65 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
| 66 | &add($o3,$cc,$o3); | ||
| 67 | &cmpult($o3,$cc,$cc); | ||
| 68 | &add($cc,$t3,$cc); &FR($t3); | ||
| 69 | |||
| 70 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
| 71 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
| 72 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
| 73 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
| 74 | |||
| 75 | &sub($count,4,$count); # count-=4 | ||
| 76 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 77 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 78 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 79 | |||
| 80 | &blt($count,&label("finish")); | ||
| 81 | &ld($a0,&QWPw(0,$ap)); | ||
| 82 | &ld($b0,&QWPw(0,$bp)); | ||
| 83 | &br(&label("loop")); | ||
| 84 | EOF | ||
| 85 | ################################################## | ||
| 86 | # Do the last 0..3 words | ||
| 87 | |||
| 88 | &set_label("last_loop"); | ||
| 89 | |||
| 90 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
| 91 | &mul($a0,$a0,($l0)=&NR(1)); | ||
| 92 | &add($ap,$QWS,$ap); | ||
| 93 | &add($rp,2*$QWS,$rp); | ||
| 94 | &sub($count,1,$count); | ||
| 95 | &muh($a0,$a0,($h0)=&NR(1)); &FR($a0); | ||
| 96 | &st($l0,&QWPw(-2,$rp)); &FR($l0); | ||
| 97 | &st($h0,&QWPw(-1,$rp)); &FR($h0); | ||
| 98 | |||
| 99 | &bgt($count,&label("last_loop")); | ||
| 100 | &function_end_A($name); | ||
| 101 | |||
| 102 | ###################################################### | ||
| 103 | &set_label("finish"); | ||
| 104 | &add($count,4,$count); | ||
| 105 | &bgt($count,&label("last_loop")); | ||
| 106 | |||
| 107 | &set_label("end"); | ||
| 108 | &function_end($name); | ||
| 109 | |||
| 110 | &fin_pool; | ||
| 111 | } | ||
| 112 | |||
| 113 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl new file mode 100644 index 0000000000..bf33f5b503 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl | |||
| @@ -0,0 +1,109 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub sqr_add_c | ||
| 5 | { | ||
| 6 | local($a,$c0,$c1,$c2)=@_; | ||
| 7 | local($l1,$h1,$t1,$t2); | ||
| 8 | |||
| 9 | &mul($a,$a,($l1)=&NR(1)); | ||
| 10 | &muh($a,$a,($h1)=&NR(1)); | ||
| 11 | &add($c0,$l1,$c0); | ||
| 12 | &add($c1,$h1,$c1); | ||
| 13 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
| 14 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
| 15 | &add($c1,$t1,$c1); &FR($t1); | ||
| 16 | &add($c2,$t2,$c2); &FR($t2); | ||
| 17 | } | ||
| 18 | |||
| 19 | sub sqr_add_c2 | ||
| 20 | { | ||
| 21 | local($a,$b,$c0,$c1,$c2)=@_; | ||
| 22 | local($l1,$h1,$t1,$t2); | ||
| 23 | |||
| 24 | &mul($a,$b,($l1)=&NR(1)); | ||
| 25 | &muh($a,$b,($h1)=&NR(1)); | ||
| 26 | &cmplt($l1,"zero",($lc1)=&NR(1)); | ||
| 27 | &cmplt($h1,"zero",($hc1)=&NR(1)); | ||
| 28 | &add($l1,$l1,$l1); | ||
| 29 | &add($h1,$h1,$h1); | ||
| 30 | &add($h1,$lc1,$h1); &FR($lc1); | ||
| 31 | &add($c2,$hc1,$c2); &FR($hc1); | ||
| 32 | |||
| 33 | &add($c0,$l1,$c0); | ||
| 34 | &add($c1,$h1,$c1); | ||
| 35 | &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1); | ||
| 36 | &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1); | ||
| 37 | |||
| 38 | &add($c1,$lc1,$c1); &FR($lc1); | ||
| 39 | &add($c2,$hc1,$c2); &FR($hc1); | ||
| 40 | } | ||
| 41 | |||
| 42 | |||
| 43 | sub bn_sqr_comba4 | ||
| 44 | { | ||
| 45 | local($name)=@_; | ||
| 46 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 47 | |||
| 48 | $cnt=1; | ||
| 49 | &init_pool(2); | ||
| 50 | |||
| 51 | $rp=&wparam(0); | ||
| 52 | $ap=&wparam(1); | ||
| 53 | |||
| 54 | &function_begin($name,""); | ||
| 55 | |||
| 56 | &comment(""); | ||
| 57 | |||
| 58 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 59 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 60 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 61 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); | ||
| 62 | |||
| 63 | ($c0,$c1,$c2)=&NR(3); | ||
| 64 | |||
| 65 | &mov("zero",$c2); | ||
| 66 | &mul($a[0],$a[0],$c0); | ||
| 67 | &muh($a[0],$a[0],$c1); | ||
| 68 | &st($c0,&QWPw(0,$rp)); | ||
| 69 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 70 | &mov("zero",$c2); | ||
| 71 | |||
| 72 | &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2); | ||
| 73 | &st($c0,&QWPw(1,$rp)); | ||
| 74 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 75 | &mov("zero",$c2); | ||
| 76 | |||
| 77 | &sqr_add_c($a[1],$c0,$c1,$c2); | ||
| 78 | &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); | ||
| 79 | &st($c0,&QWPw(2,$rp)); | ||
| 80 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 81 | &mov("zero",$c2); | ||
| 82 | |||
| 83 | &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); | ||
| 84 | &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); | ||
| 85 | &st($c0,&QWPw(3,$rp)); | ||
| 86 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 87 | &mov("zero",$c2); | ||
| 88 | |||
| 89 | &sqr_add_c($a[2],$c0,$c1,$c2); | ||
| 90 | &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); | ||
| 91 | &st($c0,&QWPw(4,$rp)); | ||
| 92 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 93 | &mov("zero",$c2); | ||
| 94 | |||
| 95 | &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); | ||
| 96 | &st($c0,&QWPw(5,$rp)); | ||
| 97 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 98 | &mov("zero",$c2); | ||
| 99 | |||
| 100 | &sqr_add_c($a[3],$c0,$c1,$c2); | ||
| 101 | &st($c0,&QWPw(6,$rp)); | ||
| 102 | &st($c1,&QWPw(7,$rp)); | ||
| 103 | |||
| 104 | &function_end($name); | ||
| 105 | |||
| 106 | &fin_pool; | ||
| 107 | } | ||
| 108 | |||
| 109 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl new file mode 100644 index 0000000000..b4afe085f1 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl | |||
| @@ -0,0 +1,132 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_sqr_comba8 | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 8 | |||
| 9 | $cnt=1; | ||
| 10 | &init_pool(2); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | |||
| 15 | &function_begin($name,""); | ||
| 16 | |||
| 17 | &comment(""); | ||
| 18 | |||
| 19 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 20 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 21 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 22 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
| 23 | &ld(($a[4])=&NR(1),&QWPw(4,$ap)); | ||
| 24 | &ld(($a[5])=&NR(1),&QWPw(5,$ap)); | ||
| 25 | &ld(($a[6])=&NR(1),&QWPw(6,$ap)); | ||
| 26 | &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap); | ||
| 27 | |||
| 28 | ($c0,$c1,$c2)=&NR(3); | ||
| 29 | |||
| 30 | &mov("zero",$c2); | ||
| 31 | &mul($a[0],$a[0],$c0); | ||
| 32 | &muh($a[0],$a[0],$c1); | ||
| 33 | &st($c0,&QWPw(0,$rp)); | ||
| 34 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 35 | &mov("zero",$c2); | ||
| 36 | |||
| 37 | &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2); | ||
| 38 | &st($c0,&QWPw(1,$rp)); | ||
| 39 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 40 | &mov("zero",$c2); | ||
| 41 | |||
| 42 | &sqr_add_c($a[1],$c0,$c1,$c2); | ||
| 43 | &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); | ||
| 44 | &st($c0,&QWPw(2,$rp)); | ||
| 45 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 46 | &mov("zero",$c2); | ||
| 47 | |||
| 48 | &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); | ||
| 49 | &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); | ||
| 50 | &st($c0,&QWPw(3,$rp)); | ||
| 51 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 52 | &mov("zero",$c2); | ||
| 53 | |||
| 54 | &sqr_add_c($a[2],$c0,$c1,$c2); | ||
| 55 | &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); | ||
| 56 | &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2); | ||
| 57 | &st($c0,&QWPw(4,$rp)); | ||
| 58 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 59 | &mov("zero",$c2); | ||
| 60 | |||
| 61 | &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); | ||
| 62 | &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2); | ||
| 63 | &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2); | ||
| 64 | &st($c0,&QWPw(5,$rp)); | ||
| 65 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 66 | &mov("zero",$c2); | ||
| 67 | |||
| 68 | &sqr_add_c($a[3],$c0,$c1,$c2); | ||
| 69 | &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2); | ||
| 70 | &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2); | ||
| 71 | &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2); | ||
| 72 | &st($c0,&QWPw(6,$rp)); | ||
| 73 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 74 | &mov("zero",$c2); | ||
| 75 | |||
| 76 | &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2); | ||
| 77 | &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2); | ||
| 78 | &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2); | ||
| 79 | &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2); | ||
| 80 | &st($c0,&QWPw(7,$rp)); | ||
| 81 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 82 | &mov("zero",$c2); | ||
| 83 | |||
| 84 | &sqr_add_c($a[4],$c0,$c1,$c2); | ||
| 85 | &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2); | ||
| 86 | &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2); | ||
| 87 | &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2); | ||
| 88 | &st($c0,&QWPw(8,$rp)); | ||
| 89 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 90 | &mov("zero",$c2); | ||
| 91 | |||
| 92 | &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2); | ||
| 93 | &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2); | ||
| 94 | &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2); | ||
| 95 | &st($c0,&QWPw(9,$rp)); | ||
| 96 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 97 | &mov("zero",$c2); | ||
| 98 | |||
| 99 | &sqr_add_c($a[5],$c0,$c1,$c2); | ||
| 100 | &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2); | ||
| 101 | &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2); | ||
| 102 | &st($c0,&QWPw(10,$rp)); | ||
| 103 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 104 | &mov("zero",$c2); | ||
| 105 | |||
| 106 | &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2); | ||
| 107 | &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2); | ||
| 108 | &st($c0,&QWPw(11,$rp)); | ||
| 109 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 110 | &mov("zero",$c2); | ||
| 111 | |||
| 112 | &sqr_add_c($a[6],$c0,$c1,$c2); | ||
| 113 | &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2); | ||
| 114 | &st($c0,&QWPw(12,$rp)); | ||
| 115 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 116 | &mov("zero",$c2); | ||
| 117 | |||
| 118 | &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2); | ||
| 119 | &st($c0,&QWPw(13,$rp)); | ||
| 120 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 121 | &mov("zero",$c2); | ||
| 122 | |||
| 123 | &sqr_add_c($a[7],$c0,$c1,$c2); | ||
| 124 | &st($c0,&QWPw(14,$rp)); | ||
| 125 | &st($c1,&QWPw(15,$rp)); | ||
| 126 | |||
| 127 | &function_end($name); | ||
| 128 | |||
| 129 | &fin_pool; | ||
| 130 | } | ||
| 131 | |||
| 132 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sub.pl b/src/lib/libcrypto/bn/asm/alpha.works/sub.pl new file mode 100644 index 0000000000..d998da5c21 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sub.pl | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_sub_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $bp=&wparam(2); | ||
| 15 | $count=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | &blt($count,&label("finish")); | ||
| 23 | |||
| 24 | ($a0,$b0)=&NR(2); | ||
| 25 | &ld($a0,&QWPw(0,$ap)); | ||
| 26 | &ld($b0,&QWPw(0,$bp)); | ||
| 27 | |||
| 28 | ########################################################## | ||
| 29 | &set_label("loop"); | ||
| 30 | |||
| 31 | ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8); | ||
| 32 | &ld($a1,&QWPw(1,$ap)); | ||
| 33 | &cmpult($a0,$b0,$tmp); # will we borrow? | ||
| 34 | &ld($b1,&QWPw(1,$bp)); | ||
| 35 | &sub($a0,$b0,$a0); # do the subtract | ||
| 36 | &ld($a2,&QWPw(2,$ap)); | ||
| 37 | &cmpult($a0,$cc,$b0); # will we borrow? | ||
| 38 | &ld($b2,&QWPw(2,$bp)); | ||
| 39 | &sub($a0,$cc,$o0); # will we borrow? | ||
| 40 | &ld($a3,&QWPw(3,$ap)); | ||
| 41 | &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp); | ||
| 42 | |||
| 43 | &cmpult($a1,$b1,$t1); # will we borrow? | ||
| 44 | &sub($a1,$b1,$a1); # do the subtract | ||
| 45 | &ld($b3,&QWPw(3,$bp)); | ||
| 46 | &cmpult($a1,$cc,$b1); # will we borrow? | ||
| 47 | &sub($a1,$cc,$o1); # will we borrow? | ||
| 48 | &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1); | ||
| 49 | |||
| 50 | &cmpult($a2,$b2,$tmp); # will we borrow? | ||
| 51 | &sub($a2,$b2,$a2); # do the subtract | ||
| 52 | &st($o0,&QWPw(0,$rp)); &FR($o0); # save | ||
| 53 | &cmpult($a2,$cc,$b2); # will we borrow? | ||
| 54 | &sub($a2,$cc,$o2); # will we borrow? | ||
| 55 | &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2); | ||
| 56 | |||
| 57 | &cmpult($a3,$b3,$t3); # will we borrow? | ||
| 58 | &sub($a3,$b3,$a3); # do the subtract | ||
| 59 | &st($o1,&QWPw(1,$rp)); &FR($o1); | ||
| 60 | &cmpult($a3,$cc,$b3); # will we borrow? | ||
| 61 | &sub($a3,$cc,$o3); # will we borrow? | ||
| 62 | &add($b3,$t3,$cc); &FR($t3,$a3,$b3); | ||
| 63 | |||
| 64 | &st($o2,&QWPw(2,$rp)); &FR($o2); | ||
| 65 | &sub($count,4,$count); # count-=4 | ||
| 66 | &st($o3,&QWPw(3,$rp)); &FR($o3); | ||
| 67 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 68 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 69 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 70 | |||
| 71 | &blt($count,&label("finish")); | ||
| 72 | &ld($a0,&QWPw(0,$ap)); | ||
| 73 | &ld($b0,&QWPw(0,$bp)); | ||
| 74 | &br(&label("loop")); | ||
| 75 | ################################################## | ||
| 76 | # Do the last 0..3 words | ||
| 77 | |||
| 78 | &set_label("last_loop"); | ||
| 79 | |||
| 80 | &ld($a0,&QWPw(0,$ap)); # get a | ||
| 81 | &ld($b0,&QWPw(0,$bp)); # get b | ||
| 82 | &cmpult($a0,$b0,$tmp); # will we borrow? | ||
| 83 | &sub($a0,$b0,$a0); # do the subtract | ||
| 84 | &cmpult($a0,$cc,$b0); # will we borrow? | ||
| 85 | &sub($a0,$cc,$a0); # will we borrow? | ||
| 86 | &st($a0,&QWPw(0,$rp)); # save | ||
| 87 | &add($b0,$tmp,$cc); # add the borrows | ||
| 88 | |||
| 89 | &add($ap,$QWS,$ap); | ||
| 90 | &add($bp,$QWS,$bp); | ||
| 91 | &add($rp,$QWS,$rp); | ||
| 92 | &sub($count,1,$count); | ||
| 93 | &bgt($count,&label("last_loop")); | ||
| 94 | &function_end_A($name); | ||
| 95 | |||
| 96 | ###################################################### | ||
| 97 | &set_label("finish"); | ||
| 98 | &add($count,4,$count); | ||
| 99 | &bgt($count,&label("last_loop")); | ||
| 100 | |||
| 101 | &FR($a0,$b0); | ||
| 102 | &set_label("end"); | ||
| 103 | &function_end($name); | ||
| 104 | |||
| 105 | &fin_pool; | ||
| 106 | } | ||
| 107 | |||
| 108 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/add.pl b/src/lib/libcrypto/bn/asm/alpha/add.pl new file mode 100644 index 0000000000..13bf516428 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/add.pl | |||
| @@ -0,0 +1,118 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_add_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $bp=&wparam(2); | ||
| 15 | $count=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | &blt($count,&label("finish")); | ||
| 23 | |||
| 24 | ($a0,$b0)=&NR(2); | ||
| 25 | |||
| 26 | ########################################################## | ||
| 27 | &set_label("loop"); | ||
| 28 | |||
| 29 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); | ||
| 30 | &ld(($b0)=&NR(1),&QWPw(0,$bp)); | ||
| 31 | &ld(($a1)=&NR(1),&QWPw(1,$ap)); | ||
| 32 | &ld(($b1)=&NR(1),&QWPw(1,$bp)); | ||
| 33 | |||
| 34 | ($o0,$t0)=&NR(2); | ||
| 35 | &add($a0,$b0,$o0); | ||
| 36 | &ld(($a2)=&NR(1),&QWPw(2,$ap)); | ||
| 37 | &cmpult($o0,$b0,$t0); | ||
| 38 | &add($o0,$cc,$o0); | ||
| 39 | &cmpult($o0,$cc,$cc); | ||
| 40 | &ld(($b2)=&NR(1),&QWPw(2,$bp)); | ||
| 41 | &add($cc,$t0,$cc); &FR($t0); | ||
| 42 | |||
| 43 | ($t1,$o1)=&NR(2); | ||
| 44 | |||
| 45 | &add($a1,$b1,$o1); &FR($a1); | ||
| 46 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
| 47 | &add($o1,$cc,$o1); | ||
| 48 | &cmpult($o1,$cc,$cc); | ||
| 49 | &ld(($a3)=&NR(1),&QWPw(3,$ap)); | ||
| 50 | &add($cc,$t1,$cc); &FR($t1); | ||
| 51 | |||
| 52 | ($t2,$o2)=&NR(2); | ||
| 53 | |||
| 54 | &add($a2,$b2,$o2); &FR($a2); | ||
| 55 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
| 56 | &add($o2,$cc,$o2); | ||
| 57 | &cmpult($o2,$cc,$cc); | ||
| 58 | &ld(($b3)=&NR(1),&QWPw(3,$bp)); | ||
| 59 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
| 60 | &add($cc,$t2,$cc); &FR($t2); | ||
| 61 | |||
| 62 | ($t3,$o3)=&NR(2); | ||
| 63 | |||
| 64 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
| 65 | &add($a3,$b3,$o3); &FR($a3); | ||
| 66 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
| 67 | &add($o3,$cc,$o3); | ||
| 68 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
| 69 | &cmpult($o3,$cc,$cc); | ||
| 70 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
| 71 | &add($cc,$t3,$cc); &FR($t3); | ||
| 72 | |||
| 73 | |||
| 74 | &sub($count,4,$count); # count-=4 | ||
| 75 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 76 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 77 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 78 | |||
| 79 | ### | ||
| 80 | &bge($count,&label("loop")); | ||
| 81 | ### | ||
| 82 | &br(&label("finish")); | ||
| 83 | ################################################## | ||
| 84 | # Do the last 0..3 words | ||
| 85 | |||
| 86 | ($t0,$o0)=&NR(2); | ||
| 87 | &set_label("last_loop"); | ||
| 88 | |||
| 89 | &ld($a0,&QWPw(0,$ap)); # get a | ||
| 90 | &ld($b0,&QWPw(0,$bp)); # get b | ||
| 91 | &add($ap,$QWS,$ap); | ||
| 92 | &add($bp,$QWS,$bp); | ||
| 93 | &add($a0,$b0,$o0); | ||
| 94 | &sub($count,1,$count); | ||
| 95 | &cmpult($o0,$b0,$t0); # will we borrow? | ||
| 96 | &add($o0,$cc,$o0); # will we borrow? | ||
| 97 | &cmpult($o0,$cc,$cc); # will we borrow? | ||
| 98 | &add($rp,$QWS,$rp); | ||
| 99 | &st($o0,&QWPw(-1,$rp)); # save | ||
| 100 | &add($cc,$t0,$cc); # add the borrows | ||
| 101 | |||
| 102 | ### | ||
| 103 | &bgt($count,&label("last_loop")); | ||
| 104 | &function_end_A($name); | ||
| 105 | |||
| 106 | ###################################################### | ||
| 107 | &set_label("finish"); | ||
| 108 | &add($count,4,$count); | ||
| 109 | &bgt($count,&label("last_loop")); | ||
| 110 | |||
| 111 | &FR($o0,$t0,$a0,$b0); | ||
| 112 | &set_label("end"); | ||
| 113 | &function_end($name); | ||
| 114 | |||
| 115 | &fin_pool; | ||
| 116 | } | ||
| 117 | |||
| 118 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/div.pl b/src/lib/libcrypto/bn/asm/alpha/div.pl new file mode 100644 index 0000000000..e9e680897a --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/div.pl | |||
| @@ -0,0 +1,144 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | |||
| 3 | sub bn_div_words | ||
| 4 | { | ||
| 5 | local($data)=<<'EOF'; | ||
| 6 | # | ||
| 7 | # What follows was taken directly from the C compiler with a few | ||
| 8 | # hacks to redo the lables. | ||
| 9 | # | ||
| 10 | .text | ||
| 11 | .set noreorder | ||
| 12 | .set volatile | ||
| 13 | .align 3 | ||
| 14 | .globl bn_div_words | ||
| 15 | .ent bn_div_words | ||
| 16 | bn_div_words | ||
| 17 | ldgp $29,0($27) | ||
| 18 | bn_div_words.ng: | ||
| 19 | lda $30,-48($30) | ||
| 20 | .frame $30,48,$26,0 | ||
| 21 | stq $26,0($30) | ||
| 22 | stq $9,8($30) | ||
| 23 | stq $10,16($30) | ||
| 24 | stq $11,24($30) | ||
| 25 | stq $12,32($30) | ||
| 26 | stq $13,40($30) | ||
| 27 | .mask 0x4003e00,-48 | ||
| 28 | .prologue 1 | ||
| 29 | bis $16,$16,$9 | ||
| 30 | bis $17,$17,$10 | ||
| 31 | bis $18,$18,$11 | ||
| 32 | bis $31,$31,$13 | ||
| 33 | bis $31,2,$12 | ||
| 34 | bne $11,$9119 | ||
| 35 | lda $0,-1 | ||
| 36 | br $31,$9136 | ||
| 37 | .align 4 | ||
| 38 | $9119: | ||
| 39 | bis $11,$11,$16 | ||
| 40 | jsr $26,BN_num_bits_word | ||
| 41 | ldgp $29,0($26) | ||
| 42 | subq $0,64,$1 | ||
| 43 | beq $1,$9120 | ||
| 44 | bis $31,1,$1 | ||
| 45 | sll $1,$0,$1 | ||
| 46 | cmpule $9,$1,$1 | ||
| 47 | bne $1,$9120 | ||
| 48 | # lda $16,_IO_stderr_ | ||
| 49 | # lda $17,$C32 | ||
| 50 | # bis $0,$0,$18 | ||
| 51 | # jsr $26,fprintf | ||
| 52 | # ldgp $29,0($26) | ||
| 53 | jsr $26,abort | ||
| 54 | ldgp $29,0($26) | ||
| 55 | .align 4 | ||
| 56 | $9120: | ||
| 57 | bis $31,64,$3 | ||
| 58 | cmpult $9,$11,$2 | ||
| 59 | subq $3,$0,$1 | ||
| 60 | addl $1,$31,$0 | ||
| 61 | subq $9,$11,$1 | ||
| 62 | cmoveq $2,$1,$9 | ||
| 63 | beq $0,$9122 | ||
| 64 | zapnot $0,15,$2 | ||
| 65 | subq $3,$0,$1 | ||
| 66 | sll $11,$2,$11 | ||
| 67 | sll $9,$2,$3 | ||
| 68 | srl $10,$1,$1 | ||
| 69 | sll $10,$2,$10 | ||
| 70 | bis $3,$1,$9 | ||
| 71 | $9122: | ||
| 72 | srl $11,32,$5 | ||
| 73 | zapnot $11,15,$6 | ||
| 74 | lda $7,-1 | ||
| 75 | .align 5 | ||
| 76 | $9123: | ||
| 77 | srl $9,32,$1 | ||
| 78 | subq $1,$5,$1 | ||
| 79 | bne $1,$9126 | ||
| 80 | zapnot $7,15,$27 | ||
| 81 | br $31,$9127 | ||
| 82 | .align 4 | ||
| 83 | $9126: | ||
| 84 | bis $9,$9,$24 | ||
| 85 | bis $5,$5,$25 | ||
| 86 | divqu $24,$25,$27 | ||
| 87 | $9127: | ||
| 88 | srl $10,32,$4 | ||
| 89 | .align 5 | ||
| 90 | $9128: | ||
| 91 | mulq $27,$5,$1 | ||
| 92 | subq $9,$1,$3 | ||
| 93 | zapnot $3,240,$1 | ||
| 94 | bne $1,$9129 | ||
| 95 | mulq $6,$27,$2 | ||
| 96 | sll $3,32,$1 | ||
| 97 | addq $1,$4,$1 | ||
| 98 | cmpule $2,$1,$2 | ||
| 99 | bne $2,$9129 | ||
| 100 | subq $27,1,$27 | ||
| 101 | br $31,$9128 | ||
| 102 | .align 4 | ||
| 103 | $9129: | ||
| 104 | mulq $27,$6,$1 | ||
| 105 | mulq $27,$5,$4 | ||
| 106 | srl $1,32,$3 | ||
| 107 | sll $1,32,$1 | ||
| 108 | addq $4,$3,$4 | ||
| 109 | cmpult $10,$1,$2 | ||
| 110 | subq $10,$1,$10 | ||
| 111 | addq $2,$4,$2 | ||
| 112 | cmpult $9,$2,$1 | ||
| 113 | bis $2,$2,$4 | ||
| 114 | beq $1,$9134 | ||
| 115 | addq $9,$11,$9 | ||
| 116 | subq $27,1,$27 | ||
| 117 | $9134: | ||
| 118 | subl $12,1,$12 | ||
| 119 | subq $9,$4,$9 | ||
| 120 | beq $12,$9124 | ||
| 121 | sll $27,32,$13 | ||
| 122 | sll $9,32,$2 | ||
| 123 | srl $10,32,$1 | ||
| 124 | sll $10,32,$10 | ||
| 125 | bis $2,$1,$9 | ||
| 126 | br $31,$9123 | ||
| 127 | .align 4 | ||
| 128 | $9124: | ||
| 129 | bis $13,$27,$0 | ||
| 130 | $9136: | ||
| 131 | ldq $26,0($30) | ||
| 132 | ldq $9,8($30) | ||
| 133 | ldq $10,16($30) | ||
| 134 | ldq $11,24($30) | ||
| 135 | ldq $12,32($30) | ||
| 136 | ldq $13,40($30) | ||
| 137 | addq $30,48,$30 | ||
| 138 | ret $31,($26),1 | ||
| 139 | .end bn_div_words | ||
| 140 | EOF | ||
| 141 | &asm_add($data); | ||
| 142 | } | ||
| 143 | |||
| 144 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul.pl b/src/lib/libcrypto/bn/asm/alpha/mul.pl new file mode 100644 index 0000000000..76c926566c --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul.pl | |||
| @@ -0,0 +1,104 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_mul_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r,$couny); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $count=&wparam(2); | ||
| 15 | $word=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | ### | ||
| 23 | &blt($count,&label("finish")); | ||
| 24 | |||
| 25 | ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap)); | ||
| 26 | |||
| 27 | &set_label("loop"); | ||
| 28 | |||
| 29 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
| 30 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
| 31 | |||
| 32 | &muh($a0,$word,($h0)=&NR(1)); &FR($a0); | ||
| 33 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
| 34 | ### wait 8 | ||
| 35 | &mul($a0,$word,($l0)=&NR(1)); &FR($a0); | ||
| 36 | ### wait 8 | ||
| 37 | &muh($a1,$word,($h1)=&NR(1)); &FR($a1); | ||
| 38 | &add($l0,$cc,$l0); ### wait 8 | ||
| 39 | &mul($a1,$word,($l1)=&NR(1)); &FR($a1); | ||
| 40 | &cmpult($l0,$cc,$cc); ### wait 8 | ||
| 41 | &muh($a2,$word,($h2)=&NR(1)); &FR($a2); | ||
| 42 | &add($h0,$cc,$cc); &FR($h0); ### wait 8 | ||
| 43 | &mul($a2,$word,($l2)=&NR(1)); &FR($a2); | ||
| 44 | &add($l1,$cc,$l1); ### wait 8 | ||
| 45 | &st($l0,&QWPw(0,$rp)); &FR($l0); | ||
| 46 | &cmpult($l1,$cc,$cc); ### wait 8 | ||
| 47 | &muh($a3,$word,($h3)=&NR(1)); &FR($a3); | ||
| 48 | &add($h1,$cc,$cc); &FR($h1); | ||
| 49 | &mul($a3,$word,($l3)=&NR(1)); &FR($a3); | ||
| 50 | &add($l2,$cc,$l2); | ||
| 51 | &st($l1,&QWPw(1,$rp)); &FR($l1); | ||
| 52 | &cmpult($l2,$cc,$cc); | ||
| 53 | &add($h2,$cc,$cc); &FR($h2); | ||
| 54 | &sub($count,4,$count); # count-=4 | ||
| 55 | &st($l2,&QWPw(2,$rp)); &FR($l2); | ||
| 56 | &add($l3,$cc,$l3); | ||
| 57 | &cmpult($l3,$cc,$cc); | ||
| 58 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 59 | &add($h3,$cc,$cc); &FR($h3); | ||
| 60 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 61 | &st($l3,&QWPw(3,$rp)); &FR($l3); | ||
| 62 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 63 | ### | ||
| 64 | &blt($count,&label("finish")); | ||
| 65 | ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap)); | ||
| 66 | &br(&label("finish")); | ||
| 67 | ################################################## | ||
| 68 | |||
| 69 | ################################################## | ||
| 70 | # Do the last 0..3 words | ||
| 71 | |||
| 72 | &set_label("last_loop"); | ||
| 73 | |||
| 74 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
| 75 | ### | ||
| 76 | ### | ||
| 77 | ### | ||
| 78 | &muh($a0,$word,($h0)=&NR(1)); | ||
| 79 | ### Wait 8 for next mul issue | ||
| 80 | &mul($a0,$word,($l0)=&NR(1)); &FR($a0) | ||
| 81 | &add($ap,$QWS,$ap); | ||
| 82 | ### Loose 12 until result is available | ||
| 83 | &add($rp,$QWS,$rp); | ||
| 84 | &sub($count,1,$count); | ||
| 85 | &add($l0,$cc,$l0); | ||
| 86 | ### | ||
| 87 | &st($l0,&QWPw(-1,$rp)); &FR($l0); | ||
| 88 | &cmpult($l0,$cc,$cc); | ||
| 89 | &add($h0,$cc,$cc); &FR($h0); | ||
| 90 | &bgt($count,&label("last_loop")); | ||
| 91 | &function_end_A($name); | ||
| 92 | |||
| 93 | ###################################################### | ||
| 94 | &set_label("finish"); | ||
| 95 | &add($count,4,$count); | ||
| 96 | &bgt($count,&label("last_loop")); | ||
| 97 | |||
| 98 | &set_label("end"); | ||
| 99 | &function_end($name); | ||
| 100 | |||
| 101 | &fin_pool; | ||
| 102 | } | ||
| 103 | |||
| 104 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_add.pl b/src/lib/libcrypto/bn/asm/alpha/mul_add.pl new file mode 100644 index 0000000000..0d6df69bc4 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul_add.pl | |||
| @@ -0,0 +1,123 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_mul_add_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r,$couny); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $count=&wparam(2); | ||
| 15 | $word=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | ### | ||
| 23 | &blt($count,&label("finish")); | ||
| 24 | |||
| 25 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); | ||
| 26 | |||
| 27 | $a=<<'EOF'; | ||
| 28 | ########################################################## | ||
| 29 | &set_label("loop"); | ||
| 30 | |||
| 31 | &ld(($r0)=&NR(1),&QWPw(0,$rp)); | ||
| 32 | &ld(($a1)=&NR(1),&QWPw(1,$ap)); | ||
| 33 | &muh($a0,$word,($h0)=&NR(1)); | ||
| 34 | &ld(($r1)=&NR(1),&QWPw(1,$rp)); | ||
| 35 | &ld(($a2)=&NR(1),&QWPw(2,$ap)); | ||
| 36 | ### | ||
| 37 | &mul($a0,$word,($l0)=&NR(1)); &FR($a0); | ||
| 38 | &ld(($r2)=&NR(1),&QWPw(2,$rp)); | ||
| 39 | &muh($a1,$word,($h1)=&NR(1)); | ||
| 40 | &ld(($a3)=&NR(1),&QWPw(3,$ap)); | ||
| 41 | &mul($a1,$word,($l1)=&NR(1)); &FR($a1); | ||
| 42 | &ld(($r3)=&NR(1),&QWPw(3,$rp)); | ||
| 43 | &add($r0,$l0,$r0); | ||
| 44 | &add($r1,$l1,$r1); | ||
| 45 | &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); | ||
| 46 | &cmpult($r1,$l1,($t1)=&NR(1)); &FR($l1); | ||
| 47 | &muh($a2,$word,($h2)=&NR(1)); | ||
| 48 | &add($r0,$cc,$r0); | ||
| 49 | &add($h0,$t0,$h0); &FR($t0); | ||
| 50 | &cmpult($r0,$cc,$cc); | ||
| 51 | &add($h1,$t1,$h1); &FR($t1); | ||
| 52 | &add($h0,$cc,$cc); &FR($h0); | ||
| 53 | &mul($a2,$word,($l2)=&NR(1)); &FR($a2); | ||
| 54 | &add($r1,$cc,$r1); | ||
| 55 | &cmpult($r1,$cc,$cc); | ||
| 56 | &add($r2,$l2,$r2); | ||
| 57 | &add($h1,$cc,$cc); &FR($h1); | ||
| 58 | &cmpult($r2,$l2,($t2)=&NR(1)); &FR($l2); | ||
| 59 | &muh($a3,$word,($h3)=&NR(1)); | ||
| 60 | &add($r2,$cc,$r2); | ||
| 61 | &st($r0,&QWPw(0,$rp)); &FR($r0); | ||
| 62 | &add($h2,$t2,$h2); &FR($t2); | ||
| 63 | &st($r1,&QWPw(1,$rp)); &FR($r1); | ||
| 64 | &cmpult($r2,$cc,$cc); | ||
| 65 | &mul($a3,$word,($l3)=&NR(1)); &FR($a3); | ||
| 66 | &add($h2,$cc,$cc); &FR($h2); | ||
| 67 | &st($r2,&QWPw(2,$rp)); &FR($r2); | ||
| 68 | &sub($count,4,$count); # count-=4 | ||
| 69 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 70 | &add($r3,$l3,$r3); | ||
| 71 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 72 | &cmpult($r3,$l3,($t3)=&NR(1)); &FR($l3); | ||
| 73 | &add($r3,$cc,$r3); | ||
| 74 | &add($h3,$t3,$h3); &FR($t3); | ||
| 75 | &cmpult($r3,$cc,$cc); | ||
| 76 | &st($r3,&QWPw(-1,$rp)); &FR($r3); | ||
| 77 | &add($h3,$cc,$cc); &FR($h3); | ||
| 78 | |||
| 79 | ### | ||
| 80 | &blt($count,&label("finish")); | ||
| 81 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); | ||
| 82 | &br(&label("loop")); | ||
| 83 | EOF | ||
| 84 | ################################################## | ||
| 85 | # Do the last 0..3 words | ||
| 86 | |||
| 87 | &set_label("last_loop"); | ||
| 88 | |||
| 89 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
| 90 | &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b | ||
| 91 | ### | ||
| 92 | ### | ||
| 93 | &muh($a0,$word,($h0)=&NR(1)); &FR($a0); | ||
| 94 | ### wait 8 | ||
| 95 | &mul($a0,$word,($l0)=&NR(1)); &FR($a0); | ||
| 96 | &add($rp,$QWS,$rp); | ||
| 97 | &add($ap,$QWS,$ap); | ||
| 98 | &sub($count,1,$count); | ||
| 99 | ### wait 3 until l0 is available | ||
| 100 | &add($r0,$l0,$r0); | ||
| 101 | ### | ||
| 102 | &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); | ||
| 103 | &add($r0,$cc,$r0); | ||
| 104 | &add($h0,$t0,$h0); &FR($t0); | ||
| 105 | &cmpult($r0,$cc,$cc); | ||
| 106 | &add($h0,$cc,$cc); &FR($h0); | ||
| 107 | |||
| 108 | &st($r0,&QWPw(-1,$rp)); &FR($r0); | ||
| 109 | &bgt($count,&label("last_loop")); | ||
| 110 | &function_end_A($name); | ||
| 111 | |||
| 112 | ###################################################### | ||
| 113 | &set_label("finish"); | ||
| 114 | &add($count,4,$count); | ||
| 115 | &bgt($count,&label("last_loop")); | ||
| 116 | |||
| 117 | &set_label("end"); | ||
| 118 | &function_end($name); | ||
| 119 | |||
| 120 | &fin_pool; | ||
| 121 | } | ||
| 122 | |||
| 123 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl new file mode 100644 index 0000000000..9cc876ded4 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl | |||
| @@ -0,0 +1,215 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | # upto | ||
| 5 | |||
| 6 | sub mul_add_c | ||
| 7 | { | ||
| 8 | local($a,$b,$c0,$c1,$c2)=@_; | ||
| 9 | local($l1,$h1,$t1,$t2); | ||
| 10 | |||
| 11 | &mul($a,$b,($l1)=&NR(1)); | ||
| 12 | &muh($a,$b,($h1)=&NR(1)); | ||
| 13 | &add($c0,$l1,$c0); | ||
| 14 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
| 15 | &add($t1,$h1,$h1); &FR($t1); | ||
| 16 | &add($c1,$h1,$c1); | ||
| 17 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
| 18 | &add($c2,$t2,$c2); &FR($t2); | ||
| 19 | } | ||
| 20 | |||
| 21 | sub bn_mul_comba4 | ||
| 22 | { | ||
| 23 | local($name)=@_; | ||
| 24 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 25 | |||
| 26 | $cnt=1; | ||
| 27 | &init_pool(3); | ||
| 28 | |||
| 29 | $rp=&wparam(0); | ||
| 30 | $ap=&wparam(1); | ||
| 31 | $bp=&wparam(2); | ||
| 32 | |||
| 33 | &function_begin($name,""); | ||
| 34 | |||
| 35 | &comment(""); | ||
| 36 | |||
| 37 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 38 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
| 39 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 40 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
| 41 | &mul($a[0],$b[0],($r00)=&NR(1)); | ||
| 42 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 43 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
| 44 | &muh($a[0],$b[0],($r01)=&NR(1)); | ||
| 45 | &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
| 46 | &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp)); | ||
| 47 | &mul($a[0],$b[1],($r02)=&NR(1)); | ||
| 48 | |||
| 49 | ($R,$H1,$H2)=&NR(3); | ||
| 50 | |||
| 51 | &st($r00,&QWPw(0,$rp)); &FR($r00); | ||
| 52 | |||
| 53 | &mov("zero",$R); | ||
| 54 | &mul($a[1],$b[0],($r03)=&NR(1)); | ||
| 55 | |||
| 56 | &mov("zero",$H1); | ||
| 57 | &mov("zero",$H0); | ||
| 58 | &add($R,$r01,$R); | ||
| 59 | &muh($a[0],$b[1],($r04)=&NR(1)); | ||
| 60 | &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01); | ||
| 61 | &add($R,$r02,$R); | ||
| 62 | &add($H1,$t01,$H1) &FR($t01); | ||
| 63 | &muh($a[1],$b[0],($r05)=&NR(1)); | ||
| 64 | &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02); | ||
| 65 | &add($R,$r03,$R); | ||
| 66 | &add($H2,$t02,$H2) &FR($t02); | ||
| 67 | &mul($a[0],$b[2],($r06)=&NR(1)); | ||
| 68 | &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03); | ||
| 69 | &add($H1,$t03,$H1) &FR($t03); | ||
| 70 | &st($R,&QWPw(1,$rp)); | ||
| 71 | &add($H1,$H2,$R); | ||
| 72 | |||
| 73 | &mov("zero",$H1); | ||
| 74 | &add($R,$r04,$R); | ||
| 75 | &mov("zero",$H2); | ||
| 76 | &mul($a[1],$b[1],($r07)=&NR(1)); | ||
| 77 | &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04); | ||
| 78 | &add($R,$r05,$R); | ||
| 79 | &add($H1,$t04,$H1) &FR($t04); | ||
| 80 | &mul($a[2],$b[0],($r08)=&NR(1)); | ||
| 81 | &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05); | ||
| 82 | &add($R,$r01,$R); | ||
| 83 | &add($H2,$t05,$H2) &FR($t05); | ||
| 84 | &muh($a[0],$b[2],($r09)=&NR(1)); | ||
| 85 | &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06); | ||
| 86 | &add($R,$r07,$R); | ||
| 87 | &add($H1,$t06,$H1) &FR($t06); | ||
| 88 | &muh($a[1],$b[1],($r10)=&NR(1)); | ||
| 89 | &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07); | ||
| 90 | &add($R,$r08,$R); | ||
| 91 | &add($H2,$t07,$H2) &FR($t07); | ||
| 92 | &muh($a[2],$b[0],($r11)=&NR(1)); | ||
| 93 | &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08); | ||
| 94 | &add($H1,$t08,$H1) &FR($t08); | ||
| 95 | &st($R,&QWPw(2,$rp)); | ||
| 96 | &add($H1,$H2,$R); | ||
| 97 | |||
| 98 | &mov("zero",$H1); | ||
| 99 | &add($R,$r09,$R); | ||
| 100 | &mov("zero",$H2); | ||
| 101 | &mul($a[0],$b[3],($r12)=&NR(1)); | ||
| 102 | &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09); | ||
| 103 | &add($R,$r10,$R); | ||
| 104 | &add($H1,$t09,$H1) &FR($t09); | ||
| 105 | &mul($a[1],$b[2],($r13)=&NR(1)); | ||
| 106 | &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10); | ||
| 107 | &add($R,$r11,$R); | ||
| 108 | &add($H1,$t10,$H1) &FR($t10); | ||
| 109 | &mul($a[2],$b[1],($r14)=&NR(1)); | ||
| 110 | &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11); | ||
| 111 | &add($R,$r12,$R); | ||
| 112 | &add($H1,$t11,$H1) &FR($t11); | ||
| 113 | &mul($a[3],$b[0],($r15)=&NR(1)); | ||
| 114 | &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12); | ||
| 115 | &add($R,$r13,$R); | ||
| 116 | &add($H1,$t12,$H1) &FR($t12); | ||
| 117 | &muh($a[0],$b[3],($r16)=&NR(1)); | ||
| 118 | &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13); | ||
| 119 | &add($R,$r14,$R); | ||
| 120 | &add($H1,$t13,$H1) &FR($t13); | ||
| 121 | &muh($a[1],$b[2],($r17)=&NR(1)); | ||
| 122 | &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14); | ||
| 123 | &add($R,$r15,$R); | ||
| 124 | &add($H1,$t14,$H1) &FR($t14); | ||
| 125 | &muh($a[2],$b[1],($r18)=&NR(1)); | ||
| 126 | &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15); | ||
| 127 | &add($H1,$t15,$H1) &FR($t15); | ||
| 128 | &st($R,&QWPw(3,$rp)); | ||
| 129 | &add($H1,$H2,$R); | ||
| 130 | |||
| 131 | &mov("zero",$H1); | ||
| 132 | &add($R,$r16,$R); | ||
| 133 | &mov("zero",$H2); | ||
| 134 | &muh($a[3],$b[0],($r19)=&NR(1)); | ||
| 135 | &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16); | ||
| 136 | &add($R,$r17,$R); | ||
| 137 | &add($H1,$t16,$H1) &FR($t16); | ||
| 138 | &mul($a[1],$b[3],($r20)=&NR(1)); | ||
| 139 | &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17); | ||
| 140 | &add($R,$r18,$R); | ||
| 141 | &add($H1,$t17,$H1) &FR($t17); | ||
| 142 | &mul($a[2],$b[2],($r21)=&NR(1)); | ||
| 143 | &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18); | ||
| 144 | &add($R,$r19,$R); | ||
| 145 | &add($H1,$t18,$H1) &FR($t18); | ||
| 146 | &mul($a[3],$b[1],($r22)=&NR(1)); | ||
| 147 | &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19); | ||
| 148 | &add($R,$r20,$R); | ||
| 149 | &add($H1,$t19,$H1) &FR($t19); | ||
| 150 | &muh($a[1],$b[3],($r23)=&NR(1)); | ||
| 151 | &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20); | ||
| 152 | &add($R,$r21,$R); | ||
| 153 | &add($H1,$t20,$H1) &FR($t20); | ||
| 154 | &muh($a[2],$b[2],($r24)=&NR(1)); | ||
| 155 | &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21); | ||
| 156 | &add($R,$r22,$R); | ||
| 157 | &add($H1,$t21,$H1) &FR($t21); | ||
| 158 | &muh($a[3],$b[1],($r25)=&NR(1)); | ||
| 159 | &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22); | ||
| 160 | &add($H1,$t22,$H1) &FR($t22); | ||
| 161 | &st($R,&QWPw(4,$rp)); | ||
| 162 | &add($H1,$H2,$R); | ||
| 163 | |||
| 164 | &mov("zero",$H1); | ||
| 165 | &add($R,$r23,$R); | ||
| 166 | &mov("zero",$H2); | ||
| 167 | &mul($a[2],$b[3],($r26)=&NR(1)); | ||
| 168 | &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23); | ||
| 169 | &add($R,$r24,$R); | ||
| 170 | &add($H1,$t23,$H1) &FR($t23); | ||
| 171 | &mul($a[3],$b[2],($r27)=&NR(1)); | ||
| 172 | &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24); | ||
| 173 | &add($R,$r25,$R); | ||
| 174 | &add($H1,$t24,$H1) &FR($t24); | ||
| 175 | &muh($a[2],$b[3],($r28)=&NR(1)); | ||
| 176 | &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25); | ||
| 177 | &add($R,$r26,$R); | ||
| 178 | &add($H1,$t25,$H1) &FR($t25); | ||
| 179 | &muh($a[3],$b[2],($r29)=&NR(1)); | ||
| 180 | &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26); | ||
| 181 | &add($R,$r27,$R); | ||
| 182 | &add($H1,$t26,$H1) &FR($t26); | ||
| 183 | &mul($a[3],$b[3],($r30)=&NR(1)); | ||
| 184 | &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27); | ||
| 185 | &add($H1,$t27,$H1) &FR($t27); | ||
| 186 | &st($R,&QWPw(5,$rp)); | ||
| 187 | &add($H1,$H2,$R); | ||
| 188 | |||
| 189 | &mov("zero",$H1); | ||
| 190 | &add($R,$r28,$R); | ||
| 191 | &mov("zero",$H2); | ||
| 192 | &muh($a[3],$b[3],($r31)=&NR(1)); | ||
| 193 | &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28); | ||
| 194 | &add($R,$r29,$R); | ||
| 195 | &add($H1,$t28,$H1) &FR($t28); | ||
| 196 | ############ | ||
| 197 | &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29); | ||
| 198 | &add($R,$r30,$R); | ||
| 199 | &add($H1,$t29,$H1) &FR($t29); | ||
| 200 | ############ | ||
| 201 | &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30); | ||
| 202 | &add($H1,$t30,$H1) &FR($t30); | ||
| 203 | &st($R,&QWPw(6,$rp)); | ||
| 204 | &add($H1,$H2,$R); | ||
| 205 | |||
| 206 | &add($R,$r31,$R); &FR($r31); | ||
| 207 | &st($R,&QWPw(7,$rp)); | ||
| 208 | |||
| 209 | &FR($R,$H1,$H2); | ||
| 210 | &function_end($name); | ||
| 211 | |||
| 212 | &fin_pool; | ||
| 213 | } | ||
| 214 | |||
| 215 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl new file mode 100644 index 0000000000..79d86dd25c --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl | |||
| @@ -0,0 +1,98 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub mul_add_c | ||
| 5 | { | ||
| 6 | local($a,$b,$c0,$c1,$c2)=@_; | ||
| 7 | local($l1,$h1,$t1,$t2); | ||
| 8 | |||
| 9 | print STDERR "count=$cnt\n"; $cnt++; | ||
| 10 | &mul($a,$b,($l1)=&NR(1)); | ||
| 11 | &muh($a,$b,($h1)=&NR(1)); | ||
| 12 | &add($c0,$l1,$c0); | ||
| 13 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
| 14 | &add($t1,$h1,$h1); &FR($t1); | ||
| 15 | &add($c1,$h1,$c1); | ||
| 16 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
| 17 | &add($c2,$t2,$c2); &FR($t2); | ||
| 18 | } | ||
| 19 | |||
| 20 | sub bn_mul_comba4 | ||
| 21 | { | ||
| 22 | local($name)=@_; | ||
| 23 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 24 | |||
| 25 | $cnt=1; | ||
| 26 | &init_pool(3); | ||
| 27 | |||
| 28 | $rp=&wparam(0); | ||
| 29 | $ap=&wparam(1); | ||
| 30 | $bp=&wparam(2); | ||
| 31 | |||
| 32 | &function_begin($name,""); | ||
| 33 | |||
| 34 | &comment(""); | ||
| 35 | |||
| 36 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 37 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
| 38 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 39 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
| 40 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 41 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
| 42 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); | ||
| 43 | &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp); | ||
| 44 | |||
| 45 | ($c0,$c1,$c2)=&NR(3); | ||
| 46 | &mov("zero",$c2); | ||
| 47 | &mul($a[0],$b[0],$c0); | ||
| 48 | &muh($a[0],$b[0],$c1); | ||
| 49 | &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 50 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 51 | &mov("zero",$c2); | ||
| 52 | |||
| 53 | &mul_add_c($a[0],$b[1],$c0,$c1,$c2); | ||
| 54 | &mul_add_c($a[1],$b[0],$c0,$c1,$c2); | ||
| 55 | &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 56 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 57 | &mov("zero",$c2); | ||
| 58 | |||
| 59 | &mul_add_c($a[1],$b[1],$c0,$c1,$c2); | ||
| 60 | &mul_add_c($a[0],$b[2],$c0,$c1,$c2); | ||
| 61 | &mul_add_c($a[2],$b[0],$c0,$c1,$c2); | ||
| 62 | &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 63 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 64 | &mov("zero",$c2); | ||
| 65 | |||
| 66 | &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]); | ||
| 67 | &mul_add_c($a[1],$b[2],$c0,$c1,$c2); | ||
| 68 | &mul_add_c($a[2],$b[1],$c0,$c1,$c2); | ||
| 69 | &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]); | ||
| 70 | &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 71 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 72 | &mov("zero",$c2); | ||
| 73 | |||
| 74 | &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]); | ||
| 75 | &mul_add_c($a[2],$b[2],$c0,$c1,$c2); | ||
| 76 | &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]); | ||
| 77 | &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 78 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 79 | &mov("zero",$c2); | ||
| 80 | |||
| 81 | &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]); | ||
| 82 | &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]); | ||
| 83 | &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 84 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 85 | &mov("zero",$c2); | ||
| 86 | |||
| 87 | &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]); | ||
| 88 | &st($c0,&QWPw(6,$rp)); | ||
| 89 | &st($c1,&QWPw(7,$rp)); | ||
| 90 | |||
| 91 | &FR($c0,$c1,$c2); | ||
| 92 | |||
| 93 | &function_end($name); | ||
| 94 | |||
| 95 | &fin_pool; | ||
| 96 | } | ||
| 97 | |||
| 98 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl new file mode 100644 index 0000000000..525ca7494b --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl | |||
| @@ -0,0 +1,177 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_mul_comba8 | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 8 | |||
| 9 | $cnt=1; | ||
| 10 | &init_pool(3); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $bp=&wparam(2); | ||
| 15 | |||
| 16 | &function_begin($name,""); | ||
| 17 | |||
| 18 | &comment(""); | ||
| 19 | |||
| 20 | &stack_push(2); | ||
| 21 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 22 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
| 23 | &st($reg_s0,&swtmp(0)); &FR($reg_s0); | ||
| 24 | &st($reg_s1,&swtmp(1)); &FR($reg_s1); | ||
| 25 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 26 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
| 27 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 28 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
| 29 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
| 30 | &ld(($b[3])=&NR(1),&QWPw(3,$bp)); | ||
| 31 | &ld(($a[4])=&NR(1),&QWPw(1,$ap)); | ||
| 32 | &ld(($b[4])=&NR(1),&QWPw(1,$bp)); | ||
| 33 | &ld(($a[5])=&NR(1),&QWPw(1,$ap)); | ||
| 34 | &ld(($b[5])=&NR(1),&QWPw(1,$bp)); | ||
| 35 | &ld(($a[6])=&NR(1),&QWPw(1,$ap)); | ||
| 36 | &ld(($b[6])=&NR(1),&QWPw(1,$bp)); | ||
| 37 | &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap); | ||
| 38 | &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp); | ||
| 39 | |||
| 40 | ($c0,$c1,$c2)=&NR(3); | ||
| 41 | &mov("zero",$c2); | ||
| 42 | &mul($a[0],$b[0],$c0); | ||
| 43 | &muh($a[0],$b[0],$c1); | ||
| 44 | &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 45 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 46 | &mov("zero",$c2); | ||
| 47 | |||
| 48 | &mul_add_c($a[0],$b[1],$c0,$c1,$c2); | ||
| 49 | &mul_add_c($a[1],$b[0],$c0,$c1,$c2); | ||
| 50 | &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 51 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 52 | &mov("zero",$c2); | ||
| 53 | |||
| 54 | &mul_add_c($a[0],$b[2],$c0,$c1,$c2); | ||
| 55 | &mul_add_c($a[1],$b[1],$c0,$c1,$c2); | ||
| 56 | &mul_add_c($a[2],$b[0],$c0,$c1,$c2); | ||
| 57 | &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 58 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 59 | &mov("zero",$c2); | ||
| 60 | |||
| 61 | &mul_add_c($a[0],$b[3],$c0,$c1,$c2); | ||
| 62 | &mul_add_c($a[1],$b[2],$c0,$c1,$c2); | ||
| 63 | &mul_add_c($a[2],$b[1],$c0,$c1,$c2); | ||
| 64 | &mul_add_c($a[3],$b[0],$c0,$c1,$c2); | ||
| 65 | &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 66 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 67 | &mov("zero",$c2); | ||
| 68 | |||
| 69 | &mul_add_c($a[0],$b[4],$c0,$c1,$c2); | ||
| 70 | &mul_add_c($a[1],$b[3],$c0,$c1,$c2); | ||
| 71 | &mul_add_c($a[2],$b[2],$c0,$c1,$c2); | ||
| 72 | &mul_add_c($a[3],$b[1],$c0,$c1,$c2); | ||
| 73 | &mul_add_c($a[4],$b[0],$c0,$c1,$c2); | ||
| 74 | &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 75 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 76 | &mov("zero",$c2); | ||
| 77 | |||
| 78 | &mul_add_c($a[0],$b[5],$c0,$c1,$c2); | ||
| 79 | &mul_add_c($a[1],$b[4],$c0,$c1,$c2); | ||
| 80 | &mul_add_c($a[2],$b[3],$c0,$c1,$c2); | ||
| 81 | &mul_add_c($a[3],$b[2],$c0,$c1,$c2); | ||
| 82 | &mul_add_c($a[4],$b[1],$c0,$c1,$c2); | ||
| 83 | &mul_add_c($a[5],$b[0],$c0,$c1,$c2); | ||
| 84 | &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 85 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 86 | &mov("zero",$c2); | ||
| 87 | |||
| 88 | &mul_add_c($a[0],$b[6],$c0,$c1,$c2); | ||
| 89 | &mul_add_c($a[1],$b[5],$c0,$c1,$c2); | ||
| 90 | &mul_add_c($a[2],$b[4],$c0,$c1,$c2); | ||
| 91 | &mul_add_c($a[3],$b[3],$c0,$c1,$c2); | ||
| 92 | &mul_add_c($a[4],$b[2],$c0,$c1,$c2); | ||
| 93 | &mul_add_c($a[5],$b[1],$c0,$c1,$c2); | ||
| 94 | &mul_add_c($a[6],$b[0],$c0,$c1,$c2); | ||
| 95 | &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 96 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 97 | &mov("zero",$c2); | ||
| 98 | |||
| 99 | &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]); | ||
| 100 | &mul_add_c($a[1],$b[6],$c0,$c1,$c2); | ||
| 101 | &mul_add_c($a[2],$b[5],$c0,$c1,$c2); | ||
| 102 | &mul_add_c($a[3],$b[4],$c0,$c1,$c2); | ||
| 103 | &mul_add_c($a[4],$b[3],$c0,$c1,$c2); | ||
| 104 | &mul_add_c($a[5],$b[2],$c0,$c1,$c2); | ||
| 105 | &mul_add_c($a[6],$b[1],$c0,$c1,$c2); | ||
| 106 | &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]); | ||
| 107 | &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 108 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 109 | &mov("zero",$c2); | ||
| 110 | |||
| 111 | &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]); | ||
| 112 | &mul_add_c($a[2],$b[6],$c0,$c1,$c2); | ||
| 113 | &mul_add_c($a[3],$b[5],$c0,$c1,$c2); | ||
| 114 | &mul_add_c($a[4],$b[4],$c0,$c1,$c2); | ||
| 115 | &mul_add_c($a[5],$b[3],$c0,$c1,$c2); | ||
| 116 | &mul_add_c($a[6],$b[2],$c0,$c1,$c2); | ||
| 117 | &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]); | ||
| 118 | &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 119 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 120 | &mov("zero",$c2); | ||
| 121 | |||
| 122 | &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]); | ||
| 123 | &mul_add_c($a[3],$b[6],$c0,$c1,$c2); | ||
| 124 | &mul_add_c($a[4],$b[5],$c0,$c1,$c2); | ||
| 125 | &mul_add_c($a[5],$b[4],$c0,$c1,$c2); | ||
| 126 | &mul_add_c($a[6],$b[3],$c0,$c1,$c2); | ||
| 127 | &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]); | ||
| 128 | &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 129 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 130 | &mov("zero",$c2); | ||
| 131 | |||
| 132 | &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]); | ||
| 133 | &mul_add_c($a[4],$b[6],$c0,$c1,$c2); | ||
| 134 | &mul_add_c($a[5],$b[5],$c0,$c1,$c2); | ||
| 135 | &mul_add_c($a[6],$b[4],$c0,$c1,$c2); | ||
| 136 | &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]); | ||
| 137 | &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 138 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 139 | &mov("zero",$c2); | ||
| 140 | |||
| 141 | &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]); | ||
| 142 | &mul_add_c($a[5],$b[6],$c0,$c1,$c2); | ||
| 143 | &mul_add_c($a[6],$b[5],$c0,$c1,$c2); | ||
| 144 | &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]); | ||
| 145 | &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 146 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 147 | &mov("zero",$c2); | ||
| 148 | |||
| 149 | &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]); | ||
| 150 | &mul_add_c($a[6],$b[6],$c0,$c1,$c2); | ||
| 151 | &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]); | ||
| 152 | &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 153 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 154 | &mov("zero",$c2); | ||
| 155 | |||
| 156 | &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]); | ||
| 157 | &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]); | ||
| 158 | &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 159 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 160 | &mov("zero",$c2); | ||
| 161 | |||
| 162 | &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]); | ||
| 163 | &st($c0,&QWPw(14,$rp)); | ||
| 164 | &st($c1,&QWPw(15,$rp)); | ||
| 165 | |||
| 166 | &FR($c0,$c1,$c2); | ||
| 167 | |||
| 168 | &ld($reg_s0,&swtmp(0)); | ||
| 169 | &ld($reg_s1,&swtmp(1)); | ||
| 170 | &stack_pop(2); | ||
| 171 | |||
| 172 | &function_end($name); | ||
| 173 | |||
| 174 | &fin_pool; | ||
| 175 | } | ||
| 176 | |||
| 177 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr.pl b/src/lib/libcrypto/bn/asm/alpha/sqr.pl new file mode 100644 index 0000000000..a55b696906 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/sqr.pl | |||
| @@ -0,0 +1,113 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_sqr_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r,$couny); | ||
| 8 | |||
| 9 | &init_pool(3); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $count=&wparam(2); | ||
| 15 | |||
| 16 | &function_begin($name,""); | ||
| 17 | |||
| 18 | &comment(""); | ||
| 19 | &sub($count,4,$count); | ||
| 20 | &mov("zero",$cc); | ||
| 21 | &br(&label("finish")); | ||
| 22 | &blt($count,&label("finish")); | ||
| 23 | |||
| 24 | ($a0,$r0)=&NR(2); | ||
| 25 | &ld($a0,&QWPw(0,$ap)); | ||
| 26 | &ld($r0,&QWPw(0,$rp)); | ||
| 27 | |||
| 28 | $a=<<'EOF'; | ||
| 29 | ########################################################## | ||
| 30 | &set_label("loop"); | ||
| 31 | |||
| 32 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
| 33 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
| 34 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
| 35 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
| 36 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
| 37 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
| 38 | |||
| 39 | ($o0,$t0)=&NR(2); | ||
| 40 | &add($a0,$b0,$o0); | ||
| 41 | &cmpult($o0,$b0,$t0); | ||
| 42 | &add($o0,$cc,$o0); | ||
| 43 | &cmpult($o0,$cc,$cc); | ||
| 44 | &add($cc,$t0,$cc); &FR($t0); | ||
| 45 | |||
| 46 | ($t1,$o1)=&NR(2); | ||
| 47 | |||
| 48 | &add($a1,$b1,$o1); &FR($a1); | ||
| 49 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
| 50 | &add($o1,$cc,$o1); | ||
| 51 | &cmpult($o1,$cc,$cc); | ||
| 52 | &add($cc,$t1,$cc); &FR($t1); | ||
| 53 | |||
| 54 | ($t2,$o2)=&NR(2); | ||
| 55 | |||
| 56 | &add($a2,$b2,$o2); &FR($a2); | ||
| 57 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
| 58 | &add($o2,$cc,$o2); | ||
| 59 | &cmpult($o2,$cc,$cc); | ||
| 60 | &add($cc,$t2,$cc); &FR($t2); | ||
| 61 | |||
| 62 | ($t3,$o3)=&NR(2); | ||
| 63 | |||
| 64 | &add($a3,$b3,$o3); &FR($a3); | ||
| 65 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
| 66 | &add($o3,$cc,$o3); | ||
| 67 | &cmpult($o3,$cc,$cc); | ||
| 68 | &add($cc,$t3,$cc); &FR($t3); | ||
| 69 | |||
| 70 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
| 71 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
| 72 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
| 73 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
| 74 | |||
| 75 | &sub($count,4,$count); # count-=4 | ||
| 76 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 77 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 78 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 79 | |||
| 80 | &blt($count,&label("finish")); | ||
| 81 | &ld($a0,&QWPw(0,$ap)); | ||
| 82 | &ld($b0,&QWPw(0,$bp)); | ||
| 83 | &br(&label("loop")); | ||
| 84 | EOF | ||
| 85 | ################################################## | ||
| 86 | # Do the last 0..3 words | ||
| 87 | |||
| 88 | &set_label("last_loop"); | ||
| 89 | |||
| 90 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
| 91 | &mul($a0,$a0,($l0)=&NR(1)); | ||
| 92 | &add($ap,$QWS,$ap); | ||
| 93 | &add($rp,2*$QWS,$rp); | ||
| 94 | &sub($count,1,$count); | ||
| 95 | &muh($a0,$a0,($h0)=&NR(1)); &FR($a0); | ||
| 96 | &st($l0,&QWPw(-2,$rp)); &FR($l0); | ||
| 97 | &st($h0,&QWPw(-1,$rp)); &FR($h0); | ||
| 98 | |||
| 99 | &bgt($count,&label("last_loop")); | ||
| 100 | &function_end_A($name); | ||
| 101 | |||
| 102 | ###################################################### | ||
| 103 | &set_label("finish"); | ||
| 104 | &add($count,4,$count); | ||
| 105 | &bgt($count,&label("last_loop")); | ||
| 106 | |||
| 107 | &set_label("end"); | ||
| 108 | &function_end($name); | ||
| 109 | |||
| 110 | &fin_pool; | ||
| 111 | } | ||
| 112 | |||
| 113 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl b/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl new file mode 100644 index 0000000000..bf33f5b503 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl | |||
| @@ -0,0 +1,109 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub sqr_add_c | ||
| 5 | { | ||
| 6 | local($a,$c0,$c1,$c2)=@_; | ||
| 7 | local($l1,$h1,$t1,$t2); | ||
| 8 | |||
| 9 | &mul($a,$a,($l1)=&NR(1)); | ||
| 10 | &muh($a,$a,($h1)=&NR(1)); | ||
| 11 | &add($c0,$l1,$c0); | ||
| 12 | &add($c1,$h1,$c1); | ||
| 13 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
| 14 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
| 15 | &add($c1,$t1,$c1); &FR($t1); | ||
| 16 | &add($c2,$t2,$c2); &FR($t2); | ||
| 17 | } | ||
| 18 | |||
| 19 | sub sqr_add_c2 | ||
| 20 | { | ||
| 21 | local($a,$b,$c0,$c1,$c2)=@_; | ||
| 22 | local($l1,$h1,$t1,$t2); | ||
| 23 | |||
| 24 | &mul($a,$b,($l1)=&NR(1)); | ||
| 25 | &muh($a,$b,($h1)=&NR(1)); | ||
| 26 | &cmplt($l1,"zero",($lc1)=&NR(1)); | ||
| 27 | &cmplt($h1,"zero",($hc1)=&NR(1)); | ||
| 28 | &add($l1,$l1,$l1); | ||
| 29 | &add($h1,$h1,$h1); | ||
| 30 | &add($h1,$lc1,$h1); &FR($lc1); | ||
| 31 | &add($c2,$hc1,$c2); &FR($hc1); | ||
| 32 | |||
| 33 | &add($c0,$l1,$c0); | ||
| 34 | &add($c1,$h1,$c1); | ||
| 35 | &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1); | ||
| 36 | &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1); | ||
| 37 | |||
| 38 | &add($c1,$lc1,$c1); &FR($lc1); | ||
| 39 | &add($c2,$hc1,$c2); &FR($hc1); | ||
| 40 | } | ||
| 41 | |||
| 42 | |||
| 43 | sub bn_sqr_comba4 | ||
| 44 | { | ||
| 45 | local($name)=@_; | ||
| 46 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 47 | |||
| 48 | $cnt=1; | ||
| 49 | &init_pool(2); | ||
| 50 | |||
| 51 | $rp=&wparam(0); | ||
| 52 | $ap=&wparam(1); | ||
| 53 | |||
| 54 | &function_begin($name,""); | ||
| 55 | |||
| 56 | &comment(""); | ||
| 57 | |||
| 58 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 59 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 60 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 61 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); | ||
| 62 | |||
| 63 | ($c0,$c1,$c2)=&NR(3); | ||
| 64 | |||
| 65 | &mov("zero",$c2); | ||
| 66 | &mul($a[0],$a[0],$c0); | ||
| 67 | &muh($a[0],$a[0],$c1); | ||
| 68 | &st($c0,&QWPw(0,$rp)); | ||
| 69 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 70 | &mov("zero",$c2); | ||
| 71 | |||
| 72 | &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2); | ||
| 73 | &st($c0,&QWPw(1,$rp)); | ||
| 74 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 75 | &mov("zero",$c2); | ||
| 76 | |||
| 77 | &sqr_add_c($a[1],$c0,$c1,$c2); | ||
| 78 | &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); | ||
| 79 | &st($c0,&QWPw(2,$rp)); | ||
| 80 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 81 | &mov("zero",$c2); | ||
| 82 | |||
| 83 | &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); | ||
| 84 | &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); | ||
| 85 | &st($c0,&QWPw(3,$rp)); | ||
| 86 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 87 | &mov("zero",$c2); | ||
| 88 | |||
| 89 | &sqr_add_c($a[2],$c0,$c1,$c2); | ||
| 90 | &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); | ||
| 91 | &st($c0,&QWPw(4,$rp)); | ||
| 92 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 93 | &mov("zero",$c2); | ||
| 94 | |||
| 95 | &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); | ||
| 96 | &st($c0,&QWPw(5,$rp)); | ||
| 97 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 98 | &mov("zero",$c2); | ||
| 99 | |||
| 100 | &sqr_add_c($a[3],$c0,$c1,$c2); | ||
| 101 | &st($c0,&QWPw(6,$rp)); | ||
| 102 | &st($c1,&QWPw(7,$rp)); | ||
| 103 | |||
| 104 | &function_end($name); | ||
| 105 | |||
| 106 | &fin_pool; | ||
| 107 | } | ||
| 108 | |||
| 109 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl b/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl new file mode 100644 index 0000000000..b4afe085f1 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl | |||
| @@ -0,0 +1,132 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_sqr_comba8 | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 8 | |||
| 9 | $cnt=1; | ||
| 10 | &init_pool(2); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | |||
| 15 | &function_begin($name,""); | ||
| 16 | |||
| 17 | &comment(""); | ||
| 18 | |||
| 19 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 20 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 21 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 22 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
| 23 | &ld(($a[4])=&NR(1),&QWPw(4,$ap)); | ||
| 24 | &ld(($a[5])=&NR(1),&QWPw(5,$ap)); | ||
| 25 | &ld(($a[6])=&NR(1),&QWPw(6,$ap)); | ||
| 26 | &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap); | ||
| 27 | |||
| 28 | ($c0,$c1,$c2)=&NR(3); | ||
| 29 | |||
| 30 | &mov("zero",$c2); | ||
| 31 | &mul($a[0],$a[0],$c0); | ||
| 32 | &muh($a[0],$a[0],$c1); | ||
| 33 | &st($c0,&QWPw(0,$rp)); | ||
| 34 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 35 | &mov("zero",$c2); | ||
| 36 | |||
| 37 | &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2); | ||
| 38 | &st($c0,&QWPw(1,$rp)); | ||
| 39 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 40 | &mov("zero",$c2); | ||
| 41 | |||
| 42 | &sqr_add_c($a[1],$c0,$c1,$c2); | ||
| 43 | &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); | ||
| 44 | &st($c0,&QWPw(2,$rp)); | ||
| 45 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 46 | &mov("zero",$c2); | ||
| 47 | |||
| 48 | &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); | ||
| 49 | &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); | ||
| 50 | &st($c0,&QWPw(3,$rp)); | ||
| 51 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 52 | &mov("zero",$c2); | ||
| 53 | |||
| 54 | &sqr_add_c($a[2],$c0,$c1,$c2); | ||
| 55 | &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); | ||
| 56 | &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2); | ||
| 57 | &st($c0,&QWPw(4,$rp)); | ||
| 58 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 59 | &mov("zero",$c2); | ||
| 60 | |||
| 61 | &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); | ||
| 62 | &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2); | ||
| 63 | &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2); | ||
| 64 | &st($c0,&QWPw(5,$rp)); | ||
| 65 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 66 | &mov("zero",$c2); | ||
| 67 | |||
| 68 | &sqr_add_c($a[3],$c0,$c1,$c2); | ||
| 69 | &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2); | ||
| 70 | &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2); | ||
| 71 | &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2); | ||
| 72 | &st($c0,&QWPw(6,$rp)); | ||
| 73 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 74 | &mov("zero",$c2); | ||
| 75 | |||
| 76 | &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2); | ||
| 77 | &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2); | ||
| 78 | &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2); | ||
| 79 | &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2); | ||
| 80 | &st($c0,&QWPw(7,$rp)); | ||
| 81 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 82 | &mov("zero",$c2); | ||
| 83 | |||
| 84 | &sqr_add_c($a[4],$c0,$c1,$c2); | ||
| 85 | &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2); | ||
| 86 | &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2); | ||
| 87 | &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2); | ||
| 88 | &st($c0,&QWPw(8,$rp)); | ||
| 89 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 90 | &mov("zero",$c2); | ||
| 91 | |||
| 92 | &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2); | ||
| 93 | &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2); | ||
| 94 | &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2); | ||
| 95 | &st($c0,&QWPw(9,$rp)); | ||
| 96 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 97 | &mov("zero",$c2); | ||
| 98 | |||
| 99 | &sqr_add_c($a[5],$c0,$c1,$c2); | ||
| 100 | &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2); | ||
| 101 | &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2); | ||
| 102 | &st($c0,&QWPw(10,$rp)); | ||
| 103 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 104 | &mov("zero",$c2); | ||
| 105 | |||
| 106 | &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2); | ||
| 107 | &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2); | ||
| 108 | &st($c0,&QWPw(11,$rp)); | ||
| 109 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 110 | &mov("zero",$c2); | ||
| 111 | |||
| 112 | &sqr_add_c($a[6],$c0,$c1,$c2); | ||
| 113 | &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2); | ||
| 114 | &st($c0,&QWPw(12,$rp)); | ||
| 115 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 116 | &mov("zero",$c2); | ||
| 117 | |||
| 118 | &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2); | ||
| 119 | &st($c0,&QWPw(13,$rp)); | ||
| 120 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 121 | &mov("zero",$c2); | ||
| 122 | |||
| 123 | &sqr_add_c($a[7],$c0,$c1,$c2); | ||
| 124 | &st($c0,&QWPw(14,$rp)); | ||
| 125 | &st($c1,&QWPw(15,$rp)); | ||
| 126 | |||
| 127 | &function_end($name); | ||
| 128 | |||
| 129 | &fin_pool; | ||
| 130 | } | ||
| 131 | |||
| 132 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/sub.pl b/src/lib/libcrypto/bn/asm/alpha/sub.pl new file mode 100644 index 0000000000..d998da5c21 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/sub.pl | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_sub_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $bp=&wparam(2); | ||
| 15 | $count=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | &blt($count,&label("finish")); | ||
| 23 | |||
| 24 | ($a0,$b0)=&NR(2); | ||
| 25 | &ld($a0,&QWPw(0,$ap)); | ||
| 26 | &ld($b0,&QWPw(0,$bp)); | ||
| 27 | |||
| 28 | ########################################################## | ||
| 29 | &set_label("loop"); | ||
| 30 | |||
| 31 | ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8); | ||
| 32 | &ld($a1,&QWPw(1,$ap)); | ||
| 33 | &cmpult($a0,$b0,$tmp); # will we borrow? | ||
| 34 | &ld($b1,&QWPw(1,$bp)); | ||
| 35 | &sub($a0,$b0,$a0); # do the subtract | ||
| 36 | &ld($a2,&QWPw(2,$ap)); | ||
| 37 | &cmpult($a0,$cc,$b0); # will we borrow? | ||
| 38 | &ld($b2,&QWPw(2,$bp)); | ||
| 39 | &sub($a0,$cc,$o0); # will we borrow? | ||
| 40 | &ld($a3,&QWPw(3,$ap)); | ||
| 41 | &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp); | ||
| 42 | |||
| 43 | &cmpult($a1,$b1,$t1); # will we borrow? | ||
| 44 | &sub($a1,$b1,$a1); # do the subtract | ||
| 45 | &ld($b3,&QWPw(3,$bp)); | ||
| 46 | &cmpult($a1,$cc,$b1); # will we borrow? | ||
| 47 | &sub($a1,$cc,$o1); # will we borrow? | ||
| 48 | &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1); | ||
| 49 | |||
| 50 | &cmpult($a2,$b2,$tmp); # will we borrow? | ||
| 51 | &sub($a2,$b2,$a2); # do the subtract | ||
| 52 | &st($o0,&QWPw(0,$rp)); &FR($o0); # save | ||
| 53 | &cmpult($a2,$cc,$b2); # will we borrow? | ||
| 54 | &sub($a2,$cc,$o2); # will we borrow? | ||
| 55 | &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2); | ||
| 56 | |||
| 57 | &cmpult($a3,$b3,$t3); # will we borrow? | ||
| 58 | &sub($a3,$b3,$a3); # do the subtract | ||
| 59 | &st($o1,&QWPw(1,$rp)); &FR($o1); | ||
| 60 | &cmpult($a3,$cc,$b3); # will we borrow? | ||
| 61 | &sub($a3,$cc,$o3); # will we borrow? | ||
| 62 | &add($b3,$t3,$cc); &FR($t3,$a3,$b3); | ||
| 63 | |||
| 64 | &st($o2,&QWPw(2,$rp)); &FR($o2); | ||
| 65 | &sub($count,4,$count); # count-=4 | ||
| 66 | &st($o3,&QWPw(3,$rp)); &FR($o3); | ||
| 67 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 68 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 69 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 70 | |||
| 71 | &blt($count,&label("finish")); | ||
| 72 | &ld($a0,&QWPw(0,$ap)); | ||
| 73 | &ld($b0,&QWPw(0,$bp)); | ||
| 74 | &br(&label("loop")); | ||
| 75 | ################################################## | ||
| 76 | # Do the last 0..3 words | ||
| 77 | |||
| 78 | &set_label("last_loop"); | ||
| 79 | |||
| 80 | &ld($a0,&QWPw(0,$ap)); # get a | ||
| 81 | &ld($b0,&QWPw(0,$bp)); # get b | ||
| 82 | &cmpult($a0,$b0,$tmp); # will we borrow? | ||
| 83 | &sub($a0,$b0,$a0); # do the subtract | ||
| 84 | &cmpult($a0,$cc,$b0); # will we borrow? | ||
| 85 | &sub($a0,$cc,$a0); # will we borrow? | ||
| 86 | &st($a0,&QWPw(0,$rp)); # save | ||
| 87 | &add($b0,$tmp,$cc); # add the borrows | ||
| 88 | |||
| 89 | &add($ap,$QWS,$ap); | ||
| 90 | &add($bp,$QWS,$bp); | ||
| 91 | &add($rp,$QWS,$rp); | ||
| 92 | &sub($count,1,$count); | ||
| 93 | &bgt($count,&label("last_loop")); | ||
| 94 | &function_end_A($name); | ||
| 95 | |||
| 96 | ###################################################### | ||
| 97 | &set_label("finish"); | ||
| 98 | &add($count,4,$count); | ||
| 99 | &bgt($count,&label("last_loop")); | ||
| 100 | |||
| 101 | &FR($a0,$b0); | ||
| 102 | &set_label("end"); | ||
| 103 | &function_end($name); | ||
| 104 | |||
| 105 | &fin_pool; | ||
| 106 | } | ||
| 107 | |||
| 108 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/add.pl b/src/lib/libcrypto/bn/asm/x86/add.pl new file mode 100644 index 0000000000..0b5cf583e3 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/add.pl | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # x86 assember | ||
| 3 | |||
| 4 | sub bn_add_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | |||
| 8 | &function_begin($name,""); | ||
| 9 | |||
| 10 | &comment(""); | ||
| 11 | $a="esi"; | ||
| 12 | $b="edi"; | ||
| 13 | $c="eax"; | ||
| 14 | $r="ebx"; | ||
| 15 | $tmp1="ecx"; | ||
| 16 | $tmp2="edx"; | ||
| 17 | $num="ebp"; | ||
| 18 | |||
| 19 | &mov($r,&wparam(0)); # get r | ||
| 20 | &mov($a,&wparam(1)); # get a | ||
| 21 | &mov($b,&wparam(2)); # get b | ||
| 22 | &mov($num,&wparam(3)); # get num | ||
| 23 | &xor($c,$c); # clear carry | ||
| 24 | &and($num,0xfffffff8); # num / 8 | ||
| 25 | |||
| 26 | &jz(&label("aw_finish")); | ||
| 27 | |||
| 28 | &set_label("aw_loop",0); | ||
| 29 | for ($i=0; $i<8; $i++) | ||
| 30 | { | ||
| 31 | &comment("Round $i"); | ||
| 32 | |||
| 33 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
| 34 | &mov($tmp2,&DWP($i*4,$b,"",0)); # *b | ||
| 35 | &add($tmp1,$c); | ||
| 36 | &mov($c,0); | ||
| 37 | &adc($c,$c); | ||
| 38 | &add($tmp1,$tmp2); | ||
| 39 | &adc($c,0); | ||
| 40 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
| 41 | } | ||
| 42 | |||
| 43 | &comment(""); | ||
| 44 | &add($a,32); | ||
| 45 | &add($b,32); | ||
| 46 | &add($r,32); | ||
| 47 | &sub($num,8); | ||
| 48 | &jnz(&label("aw_loop")); | ||
| 49 | |||
| 50 | &set_label("aw_finish",0); | ||
| 51 | &mov($num,&wparam(3)); # get num | ||
| 52 | &and($num,7); | ||
| 53 | &jz(&label("aw_end")); | ||
| 54 | |||
| 55 | for ($i=0; $i<7; $i++) | ||
| 56 | { | ||
| 57 | &comment("Tail Round $i"); | ||
| 58 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
| 59 | &mov($tmp2,&DWP($i*4,$b,"",0));# *b | ||
| 60 | &add($tmp1,$c); | ||
| 61 | &mov($c,0); | ||
| 62 | &adc($c,$c); | ||
| 63 | &add($tmp1,$tmp2); | ||
| 64 | &adc($c,0); | ||
| 65 | &dec($num) if ($i != 6); | ||
| 66 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *a | ||
| 67 | &jz(&label("aw_end")) if ($i != 6); | ||
| 68 | } | ||
| 69 | &set_label("aw_end",0); | ||
| 70 | |||
| 71 | # &mov("eax",$c); # $c is "eax" | ||
| 72 | |||
| 73 | &function_end($name); | ||
| 74 | } | ||
| 75 | |||
| 76 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/comba.pl b/src/lib/libcrypto/bn/asm/x86/comba.pl new file mode 100644 index 0000000000..2291253629 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/comba.pl | |||
| @@ -0,0 +1,277 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # x86 assember | ||
| 3 | |||
| 4 | sub mul_add_c | ||
| 5 | { | ||
| 6 | local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | ||
| 7 | |||
| 8 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next | ||
| 9 | # words, and 1 if load return value | ||
| 10 | |||
| 11 | &comment("mul a[$ai]*b[$bi]"); | ||
| 12 | |||
| 13 | # "eax" and "edx" will always be pre-loaded. | ||
| 14 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; | ||
| 15 | # &mov("edx",&DWP($bi*4,$b,"",0)); | ||
| 16 | |||
| 17 | &mul("edx"); | ||
| 18 | &add($c0,"eax"); | ||
| 19 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a | ||
| 20 | &mov("eax",&wparam(0)) if $pos > 0; # load r[] | ||
| 21 | ### | ||
| 22 | &adc($c1,"edx"); | ||
| 23 | &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b | ||
| 24 | &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b | ||
| 25 | ### | ||
| 26 | &adc($c2,0); | ||
| 27 | # is pos > 1, it means it is the last loop | ||
| 28 | &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; | ||
| 29 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a | ||
| 30 | } | ||
| 31 | |||
| 32 | sub sqr_add_c | ||
| 33 | { | ||
| 34 | local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | ||
| 35 | |||
| 36 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next | ||
| 37 | # words, and 1 if load return value | ||
| 38 | |||
| 39 | &comment("sqr a[$ai]*a[$bi]"); | ||
| 40 | |||
| 41 | # "eax" and "edx" will always be pre-loaded. | ||
| 42 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; | ||
| 43 | # &mov("edx",&DWP($bi*4,$b,"",0)); | ||
| 44 | |||
| 45 | if ($ai == $bi) | ||
| 46 | { &mul("eax");} | ||
| 47 | else | ||
| 48 | { &mul("edx");} | ||
| 49 | &add($c0,"eax"); | ||
| 50 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a | ||
| 51 | ### | ||
| 52 | &adc($c1,"edx"); | ||
| 53 | &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); | ||
| 54 | ### | ||
| 55 | &adc($c2,0); | ||
| 56 | # is pos > 1, it means it is the last loop | ||
| 57 | &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; | ||
| 58 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b | ||
| 59 | } | ||
| 60 | |||
| 61 | sub sqr_add_c2 | ||
| 62 | { | ||
| 63 | local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | ||
| 64 | |||
| 65 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next | ||
| 66 | # words, and 1 if load return value | ||
| 67 | |||
| 68 | &comment("sqr a[$ai]*a[$bi]"); | ||
| 69 | |||
| 70 | # "eax" and "edx" will always be pre-loaded. | ||
| 71 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; | ||
| 72 | # &mov("edx",&DWP($bi*4,$a,"",0)); | ||
| 73 | |||
| 74 | if ($ai == $bi) | ||
| 75 | { &mul("eax");} | ||
| 76 | else | ||
| 77 | { &mul("edx");} | ||
| 78 | &add("eax","eax"); | ||
| 79 | ### | ||
| 80 | &adc("edx","edx"); | ||
| 81 | ### | ||
| 82 | &adc($c2,0); | ||
| 83 | &add($c0,"eax"); | ||
| 84 | &adc($c1,"edx"); | ||
| 85 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a | ||
| 86 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b | ||
| 87 | &adc($c2,0); | ||
| 88 | &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; | ||
| 89 | &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb); | ||
| 90 | ### | ||
| 91 | } | ||
| 92 | |||
| 93 | sub bn_mul_comba | ||
| 94 | { | ||
| 95 | local($name,$num)=@_; | ||
| 96 | local($a,$b,$c0,$c1,$c2); | ||
| 97 | local($i,$as,$ae,$bs,$be,$ai,$bi); | ||
| 98 | local($tot,$end); | ||
| 99 | |||
| 100 | &function_begin_B($name,""); | ||
| 101 | |||
| 102 | $c0="ebx"; | ||
| 103 | $c1="ecx"; | ||
| 104 | $c2="ebp"; | ||
| 105 | $a="esi"; | ||
| 106 | $b="edi"; | ||
| 107 | |||
| 108 | $as=0; | ||
| 109 | $ae=0; | ||
| 110 | $bs=0; | ||
| 111 | $be=0; | ||
| 112 | $tot=$num+$num-1; | ||
| 113 | |||
| 114 | &push("esi"); | ||
| 115 | &mov($a,&wparam(1)); | ||
| 116 | &push("edi"); | ||
| 117 | &mov($b,&wparam(2)); | ||
| 118 | &push("ebp"); | ||
| 119 | &push("ebx"); | ||
| 120 | |||
| 121 | &xor($c0,$c0); | ||
| 122 | &mov("eax",&DWP(0,$a,"",0)); # load the first word | ||
| 123 | &xor($c1,$c1); | ||
| 124 | &mov("edx",&DWP(0,$b,"",0)); # load the first second | ||
| 125 | |||
| 126 | for ($i=0; $i<$tot; $i++) | ||
| 127 | { | ||
| 128 | $ai=$as; | ||
| 129 | $bi=$bs; | ||
| 130 | $end=$be+1; | ||
| 131 | |||
| 132 | &comment("################## Calculate word $i"); | ||
| 133 | |||
| 134 | for ($j=$bs; $j<$end; $j++) | ||
| 135 | { | ||
| 136 | &xor($c2,$c2) if ($j == $bs); | ||
| 137 | if (($j+1) == $end) | ||
| 138 | { | ||
| 139 | $v=1; | ||
| 140 | $v=2 if (($i+1) == $tot); | ||
| 141 | } | ||
| 142 | else | ||
| 143 | { $v=0; } | ||
| 144 | if (($j+1) != $end) | ||
| 145 | { | ||
| 146 | $na=($ai-1); | ||
| 147 | $nb=($bi+1); | ||
| 148 | } | ||
| 149 | else | ||
| 150 | { | ||
| 151 | $na=$as+($i < ($num-1)); | ||
| 152 | $nb=$bs+($i >= ($num-1)); | ||
| 153 | } | ||
| 154 | #printf STDERR "[$ai,$bi] -> [$na,$nb]\n"; | ||
| 155 | &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb); | ||
| 156 | if ($v) | ||
| 157 | { | ||
| 158 | &comment("saved r[$i]"); | ||
| 159 | # &mov("eax",&wparam(0)); | ||
| 160 | # &mov(&DWP($i*4,"eax","",0),$c0); | ||
| 161 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 162 | } | ||
| 163 | $ai--; | ||
| 164 | $bi++; | ||
| 165 | } | ||
| 166 | $as++ if ($i < ($num-1)); | ||
| 167 | $ae++ if ($i >= ($num-1)); | ||
| 168 | |||
| 169 | $bs++ if ($i >= ($num-1)); | ||
| 170 | $be++ if ($i < ($num-1)); | ||
| 171 | } | ||
| 172 | &comment("save r[$i]"); | ||
| 173 | # &mov("eax",&wparam(0)); | ||
| 174 | &mov(&DWP($i*4,"eax","",0),$c0); | ||
| 175 | |||
| 176 | &pop("ebx"); | ||
| 177 | &pop("ebp"); | ||
| 178 | &pop("edi"); | ||
| 179 | &pop("esi"); | ||
| 180 | &ret(); | ||
| 181 | &function_end_B($name); | ||
| 182 | } | ||
| 183 | |||
| 184 | sub bn_sqr_comba | ||
| 185 | { | ||
| 186 | local($name,$num)=@_; | ||
| 187 | local($r,$a,$c0,$c1,$c2)=@_; | ||
| 188 | local($i,$as,$ae,$bs,$be,$ai,$bi); | ||
| 189 | local($b,$tot,$end,$half); | ||
| 190 | |||
| 191 | &function_begin_B($name,""); | ||
| 192 | |||
| 193 | $c0="ebx"; | ||
| 194 | $c1="ecx"; | ||
| 195 | $c2="ebp"; | ||
| 196 | $a="esi"; | ||
| 197 | $r="edi"; | ||
| 198 | |||
| 199 | &push("esi"); | ||
| 200 | &push("edi"); | ||
| 201 | &push("ebp"); | ||
| 202 | &push("ebx"); | ||
| 203 | &mov($r,&wparam(0)); | ||
| 204 | &mov($a,&wparam(1)); | ||
| 205 | &xor($c0,$c0); | ||
| 206 | &xor($c1,$c1); | ||
| 207 | &mov("eax",&DWP(0,$a,"",0)); # load the first word | ||
| 208 | |||
| 209 | $as=0; | ||
| 210 | $ae=0; | ||
| 211 | $bs=0; | ||
| 212 | $be=0; | ||
| 213 | $tot=$num+$num-1; | ||
| 214 | |||
| 215 | for ($i=0; $i<$tot; $i++) | ||
| 216 | { | ||
| 217 | $ai=$as; | ||
| 218 | $bi=$bs; | ||
| 219 | $end=$be+1; | ||
| 220 | |||
| 221 | &comment("############### Calculate word $i"); | ||
| 222 | for ($j=$bs; $j<$end; $j++) | ||
| 223 | { | ||
| 224 | &xor($c2,$c2) if ($j == $bs); | ||
| 225 | if (($ai-1) < ($bi+1)) | ||
| 226 | { | ||
| 227 | $v=1; | ||
| 228 | $v=2 if ($i+1) == $tot; | ||
| 229 | } | ||
| 230 | else | ||
| 231 | { $v=0; } | ||
| 232 | if (!$v) | ||
| 233 | { | ||
| 234 | $na=$ai-1; | ||
| 235 | $nb=$bi+1; | ||
| 236 | } | ||
| 237 | else | ||
| 238 | { | ||
| 239 | $na=$as+($i < ($num-1)); | ||
| 240 | $nb=$bs+($i >= ($num-1)); | ||
| 241 | } | ||
| 242 | if ($ai == $bi) | ||
| 243 | { | ||
| 244 | &sqr_add_c($r,$a,$ai,$bi, | ||
| 245 | $c0,$c1,$c2,$v,$i,$na,$nb); | ||
| 246 | } | ||
| 247 | else | ||
| 248 | { | ||
| 249 | &sqr_add_c2($r,$a,$ai,$bi, | ||
| 250 | $c0,$c1,$c2,$v,$i,$na,$nb); | ||
| 251 | } | ||
| 252 | if ($v) | ||
| 253 | { | ||
| 254 | &comment("saved r[$i]"); | ||
| 255 | #&mov(&DWP($i*4,$r,"",0),$c0); | ||
| 256 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 257 | last; | ||
| 258 | } | ||
| 259 | $ai--; | ||
| 260 | $bi++; | ||
| 261 | } | ||
| 262 | $as++ if ($i < ($num-1)); | ||
| 263 | $ae++ if ($i >= ($num-1)); | ||
| 264 | |||
| 265 | $bs++ if ($i >= ($num-1)); | ||
| 266 | $be++ if ($i < ($num-1)); | ||
| 267 | } | ||
| 268 | &mov(&DWP($i*4,$r,"",0),$c0); | ||
| 269 | &pop("ebx"); | ||
| 270 | &pop("ebp"); | ||
| 271 | &pop("edi"); | ||
| 272 | &pop("esi"); | ||
| 273 | &ret(); | ||
| 274 | &function_end_B($name); | ||
| 275 | } | ||
| 276 | |||
| 277 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/div.pl b/src/lib/libcrypto/bn/asm/x86/div.pl new file mode 100644 index 0000000000..0e90152caa --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/div.pl | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # x86 assember | ||
| 3 | |||
| 4 | sub bn_div_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | |||
| 8 | &function_begin($name,""); | ||
| 9 | &mov("edx",&wparam(0)); # | ||
| 10 | &mov("eax",&wparam(1)); # | ||
| 11 | &mov("ebx",&wparam(2)); # | ||
| 12 | &div("ebx"); | ||
| 13 | &function_end($name); | ||
| 14 | } | ||
| 15 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/f b/src/lib/libcrypto/bn/asm/x86/f new file mode 100644 index 0000000000..22e4112224 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/f | |||
| @@ -0,0 +1,3 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # x86 assember | ||
| 3 | |||
diff --git a/src/lib/libcrypto/bn/asm/x86/mul.pl b/src/lib/libcrypto/bn/asm/x86/mul.pl new file mode 100644 index 0000000000..674cb9b055 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/mul.pl | |||
| @@ -0,0 +1,77 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # x86 assember | ||
| 3 | |||
| 4 | sub bn_mul_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | |||
| 8 | &function_begin($name,""); | ||
| 9 | |||
| 10 | &comment(""); | ||
| 11 | $Low="eax"; | ||
| 12 | $High="edx"; | ||
| 13 | $a="ebx"; | ||
| 14 | $w="ecx"; | ||
| 15 | $r="edi"; | ||
| 16 | $c="esi"; | ||
| 17 | $num="ebp"; | ||
| 18 | |||
| 19 | &xor($c,$c); # clear carry | ||
| 20 | &mov($r,&wparam(0)); # | ||
| 21 | &mov($a,&wparam(1)); # | ||
| 22 | &mov($num,&wparam(2)); # | ||
| 23 | &mov($w,&wparam(3)); # | ||
| 24 | |||
| 25 | &and($num,0xfffffff8); # num / 8 | ||
| 26 | &jz(&label("mw_finish")); | ||
| 27 | |||
| 28 | &set_label("mw_loop",0); | ||
| 29 | for ($i=0; $i<32; $i+=4) | ||
| 30 | { | ||
| 31 | &comment("Round $i"); | ||
| 32 | |||
| 33 | &mov("eax",&DWP($i,$a,"",0)); # *a | ||
| 34 | &mul($w); # *a * w | ||
| 35 | &add("eax",$c); # L(t)+=c | ||
| 36 | # XXX | ||
| 37 | |||
| 38 | &adc("edx",0); # H(t)+=carry | ||
| 39 | &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); | ||
| 40 | |||
| 41 | &mov($c,"edx"); # c= H(t); | ||
| 42 | } | ||
| 43 | |||
| 44 | &comment(""); | ||
| 45 | &add($a,32); | ||
| 46 | &add($r,32); | ||
| 47 | &sub($num,8); | ||
| 48 | &jz(&label("mw_finish")); | ||
| 49 | &jmp(&label("mw_loop")); | ||
| 50 | |||
| 51 | &set_label("mw_finish",0); | ||
| 52 | &mov($num,&wparam(2)); # get num | ||
| 53 | &and($num,7); | ||
| 54 | &jnz(&label("mw_finish2")); | ||
| 55 | &jmp(&label("mw_end")); | ||
| 56 | |||
| 57 | &set_label("mw_finish2",1); | ||
| 58 | for ($i=0; $i<7; $i++) | ||
| 59 | { | ||
| 60 | &comment("Tail Round $i"); | ||
| 61 | &mov("eax",&DWP($i*4,$a,"",0));# *a | ||
| 62 | &mul($w); # *a * w | ||
| 63 | &add("eax",$c); # L(t)+=c | ||
| 64 | # XXX | ||
| 65 | &adc("edx",0); # H(t)+=carry | ||
| 66 | &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t); | ||
| 67 | &mov($c,"edx"); # c= H(t); | ||
| 68 | &dec($num) if ($i != 7-1); | ||
| 69 | &jz(&label("mw_end")) if ($i != 7-1); | ||
| 70 | } | ||
| 71 | &set_label("mw_end",0); | ||
| 72 | &mov("eax",$c); | ||
| 73 | |||
| 74 | &function_end($name); | ||
| 75 | } | ||
| 76 | |||
| 77 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/mul_add.pl b/src/lib/libcrypto/bn/asm/x86/mul_add.pl new file mode 100644 index 0000000000..61830d3a90 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/mul_add.pl | |||
| @@ -0,0 +1,87 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # x86 assember | ||
| 3 | |||
| 4 | sub bn_mul_add_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | |||
| 8 | &function_begin($name,""); | ||
| 9 | |||
| 10 | &comment(""); | ||
| 11 | $Low="eax"; | ||
| 12 | $High="edx"; | ||
| 13 | $a="ebx"; | ||
| 14 | $w="ebp"; | ||
| 15 | $r="edi"; | ||
| 16 | $c="esi"; | ||
| 17 | |||
| 18 | &xor($c,$c); # clear carry | ||
| 19 | &mov($r,&wparam(0)); # | ||
| 20 | |||
| 21 | &mov("ecx",&wparam(2)); # | ||
| 22 | &mov($a,&wparam(1)); # | ||
| 23 | |||
| 24 | &and("ecx",0xfffffff8); # num / 8 | ||
| 25 | &mov($w,&wparam(3)); # | ||
| 26 | |||
| 27 | &push("ecx"); # Up the stack for a tmp variable | ||
| 28 | |||
| 29 | &jz(&label("maw_finish")); | ||
| 30 | |||
| 31 | &set_label("maw_loop",0); | ||
| 32 | |||
| 33 | &mov(&swtmp(0),"ecx"); # | ||
| 34 | |||
| 35 | for ($i=0; $i<32; $i+=4) | ||
| 36 | { | ||
| 37 | &comment("Round $i"); | ||
| 38 | |||
| 39 | &mov("eax",&DWP($i,$a,"",0)); # *a | ||
| 40 | &mul($w); # *a * w | ||
| 41 | &add("eax",$c); # L(t)+= *r | ||
| 42 | &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r | ||
| 43 | &adc("edx",0); # H(t)+=carry | ||
| 44 | &add("eax",$c); # L(t)+=c | ||
| 45 | &adc("edx",0); # H(t)+=carry | ||
| 46 | &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); | ||
| 47 | &mov($c,"edx"); # c= H(t); | ||
| 48 | } | ||
| 49 | |||
| 50 | &comment(""); | ||
| 51 | &mov("ecx",&swtmp(0)); # | ||
| 52 | &add($a,32); | ||
| 53 | &add($r,32); | ||
| 54 | &sub("ecx",8); | ||
| 55 | &jnz(&label("maw_loop")); | ||
| 56 | |||
| 57 | &set_label("maw_finish",0); | ||
| 58 | &mov("ecx",&wparam(2)); # get num | ||
| 59 | &and("ecx",7); | ||
| 60 | &jnz(&label("maw_finish2")); # helps branch prediction | ||
| 61 | &jmp(&label("maw_end")); | ||
| 62 | |||
| 63 | &set_label("maw_finish2",1); | ||
| 64 | for ($i=0; $i<7; $i++) | ||
| 65 | { | ||
| 66 | &comment("Tail Round $i"); | ||
| 67 | &mov("eax",&DWP($i*4,$a,"",0));# *a | ||
| 68 | &mul($w); # *a * w | ||
| 69 | &add("eax",$c); # L(t)+=c | ||
| 70 | &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r | ||
| 71 | &adc("edx",0); # H(t)+=carry | ||
| 72 | &add("eax",$c); | ||
| 73 | &adc("edx",0); # H(t)+=carry | ||
| 74 | &dec("ecx") if ($i != 7-1); | ||
| 75 | &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t); | ||
| 76 | &mov($c,"edx"); # c= H(t); | ||
| 77 | &jz(&label("maw_end")) if ($i != 7-1); | ||
| 78 | } | ||
| 79 | &set_label("maw_end",0); | ||
| 80 | &mov("eax",$c); | ||
| 81 | |||
| 82 | &pop("ecx"); # clear variable from | ||
| 83 | |||
| 84 | &function_end($name); | ||
| 85 | } | ||
| 86 | |||
| 87 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/sqr.pl b/src/lib/libcrypto/bn/asm/x86/sqr.pl new file mode 100644 index 0000000000..1f90993cf6 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/sqr.pl | |||
| @@ -0,0 +1,60 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # x86 assember | ||
| 3 | |||
| 4 | sub bn_sqr_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | |||
| 8 | &function_begin($name,""); | ||
| 9 | |||
| 10 | &comment(""); | ||
| 11 | $r="esi"; | ||
| 12 | $a="edi"; | ||
| 13 | $num="ebx"; | ||
| 14 | |||
| 15 | &mov($r,&wparam(0)); # | ||
| 16 | &mov($a,&wparam(1)); # | ||
| 17 | &mov($num,&wparam(2)); # | ||
| 18 | |||
| 19 | &and($num,0xfffffff8); # num / 8 | ||
| 20 | &jz(&label("sw_finish")); | ||
| 21 | |||
| 22 | &set_label("sw_loop",0); | ||
| 23 | for ($i=0; $i<32; $i+=4) | ||
| 24 | { | ||
| 25 | &comment("Round $i"); | ||
| 26 | &mov("eax",&DWP($i,$a,"",0)); # *a | ||
| 27 | # XXX | ||
| 28 | &mul("eax"); # *a * *a | ||
| 29 | &mov(&DWP($i*2,$r,"",0),"eax"); # | ||
| 30 | &mov(&DWP($i*2+4,$r,"",0),"edx");# | ||
| 31 | } | ||
| 32 | |||
| 33 | &comment(""); | ||
| 34 | &add($a,32); | ||
| 35 | &add($r,64); | ||
| 36 | &sub($num,8); | ||
| 37 | &jnz(&label("sw_loop")); | ||
| 38 | |||
| 39 | &set_label("sw_finish",0); | ||
| 40 | &mov($num,&wparam(2)); # get num | ||
| 41 | &and($num,7); | ||
| 42 | &jz(&label("sw_end")); | ||
| 43 | |||
| 44 | for ($i=0; $i<7; $i++) | ||
| 45 | { | ||
| 46 | &comment("Tail Round $i"); | ||
| 47 | &mov("eax",&DWP($i*4,$a,"",0)); # *a | ||
| 48 | # XXX | ||
| 49 | &mul("eax"); # *a * *a | ||
| 50 | &mov(&DWP($i*8,$r,"",0),"eax"); # | ||
| 51 | &dec($num) if ($i != 7-1); | ||
| 52 | &mov(&DWP($i*8+4,$r,"",0),"edx"); | ||
| 53 | &jz(&label("sw_end")) if ($i != 7-1); | ||
| 54 | } | ||
| 55 | &set_label("sw_end",0); | ||
| 56 | |||
| 57 | &function_end($name); | ||
| 58 | } | ||
| 59 | |||
| 60 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/sub.pl b/src/lib/libcrypto/bn/asm/x86/sub.pl new file mode 100644 index 0000000000..837b0e1b07 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/sub.pl | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # x86 assember | ||
| 3 | |||
| 4 | sub bn_sub_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | |||
| 8 | &function_begin($name,""); | ||
| 9 | |||
| 10 | &comment(""); | ||
| 11 | $a="esi"; | ||
| 12 | $b="edi"; | ||
| 13 | $c="eax"; | ||
| 14 | $r="ebx"; | ||
| 15 | $tmp1="ecx"; | ||
| 16 | $tmp2="edx"; | ||
| 17 | $num="ebp"; | ||
| 18 | |||
| 19 | &mov($r,&wparam(0)); # get r | ||
| 20 | &mov($a,&wparam(1)); # get a | ||
| 21 | &mov($b,&wparam(2)); # get b | ||
| 22 | &mov($num,&wparam(3)); # get num | ||
| 23 | &xor($c,$c); # clear carry | ||
| 24 | &and($num,0xfffffff8); # num / 8 | ||
| 25 | |||
| 26 | &jz(&label("aw_finish")); | ||
| 27 | |||
| 28 | &set_label("aw_loop",0); | ||
| 29 | for ($i=0; $i<8; $i++) | ||
| 30 | { | ||
| 31 | &comment("Round $i"); | ||
| 32 | |||
| 33 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
| 34 | &mov($tmp2,&DWP($i*4,$b,"",0)); # *b | ||
| 35 | &sub($tmp1,$c); | ||
| 36 | &mov($c,0); | ||
| 37 | &adc($c,$c); | ||
| 38 | &sub($tmp1,$tmp2); | ||
| 39 | &adc($c,0); | ||
| 40 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
| 41 | } | ||
| 42 | |||
| 43 | &comment(""); | ||
| 44 | &add($a,32); | ||
| 45 | &add($b,32); | ||
| 46 | &add($r,32); | ||
| 47 | &sub($num,8); | ||
| 48 | &jnz(&label("aw_loop")); | ||
| 49 | |||
| 50 | &set_label("aw_finish",0); | ||
| 51 | &mov($num,&wparam(3)); # get num | ||
| 52 | &and($num,7); | ||
| 53 | &jz(&label("aw_end")); | ||
| 54 | |||
| 55 | for ($i=0; $i<7; $i++) | ||
| 56 | { | ||
| 57 | &comment("Tail Round $i"); | ||
| 58 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
| 59 | &mov($tmp2,&DWP($i*4,$b,"",0));# *b | ||
| 60 | &sub($tmp1,$c); | ||
| 61 | &mov($c,0); | ||
| 62 | &adc($c,$c); | ||
| 63 | &sub($tmp1,$tmp2); | ||
| 64 | &adc($c,0); | ||
| 65 | &dec($num) if ($i != 6); | ||
| 66 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *a | ||
| 67 | &jz(&label("aw_end")) if ($i != 6); | ||
| 68 | } | ||
| 69 | &set_label("aw_end",0); | ||
| 70 | |||
| 71 | # &mov("eax",$c); # $c is "eax" | ||
| 72 | |||
| 73 | &function_end($name); | ||
| 74 | } | ||
| 75 | |||
| 76 | 1; | ||
