diff options
Diffstat (limited to 'src/lib/libcrypto/bn/asm/alpha.works')
| -rw-r--r-- | src/lib/libcrypto/bn/asm/alpha.works/add.pl | 119 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/alpha.works/div.pl | 144 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/alpha.works/mul.pl | 116 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl | 120 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl | 213 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl | 98 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl | 177 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/alpha.works/sqr.pl | 113 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl | 109 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl | 132 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/alpha.works/sub.pl | 108 |
11 files changed, 1449 insertions, 0 deletions
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/add.pl b/src/lib/libcrypto/bn/asm/alpha.works/add.pl new file mode 100644 index 0000000000..4dc76e6b69 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/add.pl | |||
| @@ -0,0 +1,119 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_add_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $bp=&wparam(2); | ||
| 15 | $count=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | &br(&label("finish")); | ||
| 23 | &blt($count,&label("finish")); | ||
| 24 | |||
| 25 | ($a0,$b0)=&NR(2); | ||
| 26 | &ld($a0,&QWPw(0,$ap)); | ||
| 27 | &ld($b0,&QWPw(0,$bp)); | ||
| 28 | |||
| 29 | ########################################################## | ||
| 30 | &set_label("loop"); | ||
| 31 | |||
| 32 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
| 33 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
| 34 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
| 35 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
| 36 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
| 37 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
| 38 | |||
| 39 | ($o0,$t0)=&NR(2); | ||
| 40 | &add($a0,$b0,$o0); | ||
| 41 | &cmpult($o0,$b0,$t0); | ||
| 42 | &add($o0,$cc,$o0); | ||
| 43 | &cmpult($o0,$cc,$cc); | ||
| 44 | &add($cc,$t0,$cc); &FR($t0); | ||
| 45 | |||
| 46 | ($t1,$o1)=&NR(2); | ||
| 47 | |||
| 48 | &add($a1,$b1,$o1); &FR($a1); | ||
| 49 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
| 50 | &add($o1,$cc,$o1); | ||
| 51 | &cmpult($o1,$cc,$cc); | ||
| 52 | &add($cc,$t1,$cc); &FR($t1); | ||
| 53 | |||
| 54 | ($t2,$o2)=&NR(2); | ||
| 55 | |||
| 56 | &add($a2,$b2,$o2); &FR($a2); | ||
| 57 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
| 58 | &add($o2,$cc,$o2); | ||
| 59 | &cmpult($o2,$cc,$cc); | ||
| 60 | &add($cc,$t2,$cc); &FR($t2); | ||
| 61 | |||
| 62 | ($t3,$o3)=&NR(2); | ||
| 63 | |||
| 64 | &add($a3,$b3,$o3); &FR($a3); | ||
| 65 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
| 66 | &add($o3,$cc,$o3); | ||
| 67 | &cmpult($o3,$cc,$cc); | ||
| 68 | &add($cc,$t3,$cc); &FR($t3); | ||
| 69 | |||
| 70 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
| 71 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
| 72 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
| 73 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
| 74 | |||
| 75 | &sub($count,4,$count); # count-=4 | ||
| 76 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 77 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 78 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 79 | |||
| 80 | &blt($count,&label("finish")); | ||
| 81 | &ld($a0,&QWPw(0,$ap)); | ||
| 82 | &ld($b0,&QWPw(0,$bp)); | ||
| 83 | &br(&label("loop")); | ||
| 84 | ################################################## | ||
| 85 | # Do the last 0..3 words | ||
| 86 | |||
| 87 | ($t0,$o0)=&NR(2); | ||
| 88 | &set_label("last_loop"); | ||
| 89 | |||
| 90 | &ld($a0,&QWPw(0,$ap)); # get a | ||
| 91 | &ld($b0,&QWPw(0,$bp)); # get b | ||
| 92 | |||
| 93 | &add($a0,$b0,$o0); | ||
| 94 | &cmpult($o0,$b0,$t0); # will we borrow? | ||
| 95 | &add($o0,$cc,$o0); # will we borrow? | ||
| 96 | &cmpult($o0,$cc,$cc); # will we borrow? | ||
| 97 | &add($cc,$t0,$cc); # add the borrows | ||
| 98 | &st($o0,&QWPw(0,$rp)); # save | ||
| 99 | |||
| 100 | &add($ap,$QWS,$ap); | ||
| 101 | &add($bp,$QWS,$bp); | ||
| 102 | &add($rp,$QWS,$rp); | ||
| 103 | &sub($count,1,$count); | ||
| 104 | &bgt($count,&label("last_loop")); | ||
| 105 | &function_end_A($name); | ||
| 106 | |||
| 107 | ###################################################### | ||
| 108 | &set_label("finish"); | ||
| 109 | &add($count,4,$count); | ||
| 110 | &bgt($count,&label("last_loop")); | ||
| 111 | |||
| 112 | &FR($o0,$t0,$a0,$b0); | ||
| 113 | &set_label("end"); | ||
| 114 | &function_end($name); | ||
| 115 | |||
| 116 | &fin_pool; | ||
| 117 | } | ||
| 118 | |||
| 119 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/div.pl b/src/lib/libcrypto/bn/asm/alpha.works/div.pl new file mode 100644 index 0000000000..7ec144377f --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/div.pl | |||
| @@ -0,0 +1,144 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | |||
| 3 | sub bn_div64 | ||
| 4 | { | ||
| 5 | local($data)=<<'EOF'; | ||
| 6 | # | ||
| 7 | # What follows was taken directly from the C compiler with a few | ||
| 8 | # hacks to redo the lables. | ||
| 9 | # | ||
| 10 | .text | ||
| 11 | .set noreorder | ||
| 12 | .set volatile | ||
| 13 | .align 3 | ||
| 14 | .globl bn_div64 | ||
| 15 | .ent bn_div64 | ||
| 16 | bn_div64: | ||
| 17 | ldgp $29,0($27) | ||
| 18 | bn_div64..ng: | ||
| 19 | lda $30,-48($30) | ||
| 20 | .frame $30,48,$26,0 | ||
| 21 | stq $26,0($30) | ||
| 22 | stq $9,8($30) | ||
| 23 | stq $10,16($30) | ||
| 24 | stq $11,24($30) | ||
| 25 | stq $12,32($30) | ||
| 26 | stq $13,40($30) | ||
| 27 | .mask 0x4003e00,-48 | ||
| 28 | .prologue 1 | ||
| 29 | bis $16,$16,$9 | ||
| 30 | bis $17,$17,$10 | ||
| 31 | bis $18,$18,$11 | ||
| 32 | bis $31,$31,$13 | ||
| 33 | bis $31,2,$12 | ||
| 34 | bne $11,$9119 | ||
| 35 | lda $0,-1 | ||
| 36 | br $31,$9136 | ||
| 37 | .align 4 | ||
| 38 | $9119: | ||
| 39 | bis $11,$11,$16 | ||
| 40 | jsr $26,BN_num_bits_word | ||
| 41 | ldgp $29,0($26) | ||
| 42 | subq $0,64,$1 | ||
| 43 | beq $1,$9120 | ||
| 44 | bis $31,1,$1 | ||
| 45 | sll $1,$0,$1 | ||
| 46 | cmpule $9,$1,$1 | ||
| 47 | bne $1,$9120 | ||
| 48 | # lda $16,_IO_stderr_ | ||
| 49 | # lda $17,$C32 | ||
| 50 | # bis $0,$0,$18 | ||
| 51 | # jsr $26,fprintf | ||
| 52 | # ldgp $29,0($26) | ||
| 53 | jsr $26,abort | ||
| 54 | ldgp $29,0($26) | ||
| 55 | .align 4 | ||
| 56 | $9120: | ||
| 57 | bis $31,64,$3 | ||
| 58 | cmpult $9,$11,$2 | ||
| 59 | subq $3,$0,$1 | ||
| 60 | addl $1,$31,$0 | ||
| 61 | subq $9,$11,$1 | ||
| 62 | cmoveq $2,$1,$9 | ||
| 63 | beq $0,$9122 | ||
| 64 | zapnot $0,15,$2 | ||
| 65 | subq $3,$0,$1 | ||
| 66 | sll $11,$2,$11 | ||
| 67 | sll $9,$2,$3 | ||
| 68 | srl $10,$1,$1 | ||
| 69 | sll $10,$2,$10 | ||
| 70 | bis $3,$1,$9 | ||
| 71 | $9122: | ||
| 72 | srl $11,32,$5 | ||
| 73 | zapnot $11,15,$6 | ||
| 74 | lda $7,-1 | ||
| 75 | .align 5 | ||
| 76 | $9123: | ||
| 77 | srl $9,32,$1 | ||
| 78 | subq $1,$5,$1 | ||
| 79 | bne $1,$9126 | ||
| 80 | zapnot $7,15,$27 | ||
| 81 | br $31,$9127 | ||
| 82 | .align 4 | ||
| 83 | $9126: | ||
| 84 | bis $9,$9,$24 | ||
| 85 | bis $5,$5,$25 | ||
| 86 | divqu $24,$25,$27 | ||
| 87 | $9127: | ||
| 88 | srl $10,32,$4 | ||
| 89 | .align 5 | ||
| 90 | $9128: | ||
| 91 | mulq $27,$5,$1 | ||
| 92 | subq $9,$1,$3 | ||
| 93 | zapnot $3,240,$1 | ||
| 94 | bne $1,$9129 | ||
| 95 | mulq $6,$27,$2 | ||
| 96 | sll $3,32,$1 | ||
| 97 | addq $1,$4,$1 | ||
| 98 | cmpule $2,$1,$2 | ||
| 99 | bne $2,$9129 | ||
| 100 | subq $27,1,$27 | ||
| 101 | br $31,$9128 | ||
| 102 | .align 4 | ||
| 103 | $9129: | ||
| 104 | mulq $27,$6,$1 | ||
| 105 | mulq $27,$5,$4 | ||
| 106 | srl $1,32,$3 | ||
| 107 | sll $1,32,$1 | ||
| 108 | addq $4,$3,$4 | ||
| 109 | cmpult $10,$1,$2 | ||
| 110 | subq $10,$1,$10 | ||
| 111 | addq $2,$4,$2 | ||
| 112 | cmpult $9,$2,$1 | ||
| 113 | bis $2,$2,$4 | ||
| 114 | beq $1,$9134 | ||
| 115 | addq $9,$11,$9 | ||
| 116 | subq $27,1,$27 | ||
| 117 | $9134: | ||
| 118 | subl $12,1,$12 | ||
| 119 | subq $9,$4,$9 | ||
| 120 | beq $12,$9124 | ||
| 121 | sll $27,32,$13 | ||
| 122 | sll $9,32,$2 | ||
| 123 | srl $10,32,$1 | ||
| 124 | sll $10,32,$10 | ||
| 125 | bis $2,$1,$9 | ||
| 126 | br $31,$9123 | ||
| 127 | .align 4 | ||
| 128 | $9124: | ||
| 129 | bis $13,$27,$0 | ||
| 130 | $9136: | ||
| 131 | ldq $26,0($30) | ||
| 132 | ldq $9,8($30) | ||
| 133 | ldq $10,16($30) | ||
| 134 | ldq $11,24($30) | ||
| 135 | ldq $12,32($30) | ||
| 136 | ldq $13,40($30) | ||
| 137 | addq $30,48,$30 | ||
| 138 | ret $31,($26),1 | ||
| 139 | .end bn_div64 | ||
| 140 | EOF | ||
| 141 | &asm_add($data); | ||
| 142 | } | ||
| 143 | |||
| 144 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul.pl new file mode 100644 index 0000000000..b182bae452 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul.pl | |||
| @@ -0,0 +1,116 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_mul_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r,$couny); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $count=&wparam(2); | ||
| 15 | $word=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | &br(&label("finish")); | ||
| 23 | &blt($count,&label("finish")); | ||
| 24 | |||
| 25 | ($a0,$r0)=&NR(2); | ||
| 26 | &ld($a0,&QWPw(0,$ap)); | ||
| 27 | &ld($r0,&QWPw(0,$rp)); | ||
| 28 | |||
| 29 | $a=<<'EOF'; | ||
| 30 | ########################################################## | ||
| 31 | &set_label("loop"); | ||
| 32 | |||
| 33 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
| 34 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
| 35 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
| 36 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
| 37 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
| 38 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
| 39 | |||
| 40 | ($o0,$t0)=&NR(2); | ||
| 41 | &add($a0,$b0,$o0); | ||
| 42 | &cmpult($o0,$b0,$t0); | ||
| 43 | &add($o0,$cc,$o0); | ||
| 44 | &cmpult($o0,$cc,$cc); | ||
| 45 | &add($cc,$t0,$cc); &FR($t0); | ||
| 46 | |||
| 47 | ($t1,$o1)=&NR(2); | ||
| 48 | |||
| 49 | &add($a1,$b1,$o1); &FR($a1); | ||
| 50 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
| 51 | &add($o1,$cc,$o1); | ||
| 52 | &cmpult($o1,$cc,$cc); | ||
| 53 | &add($cc,$t1,$cc); &FR($t1); | ||
| 54 | |||
| 55 | ($t2,$o2)=&NR(2); | ||
| 56 | |||
| 57 | &add($a2,$b2,$o2); &FR($a2); | ||
| 58 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
| 59 | &add($o2,$cc,$o2); | ||
| 60 | &cmpult($o2,$cc,$cc); | ||
| 61 | &add($cc,$t2,$cc); &FR($t2); | ||
| 62 | |||
| 63 | ($t3,$o3)=&NR(2); | ||
| 64 | |||
| 65 | &add($a3,$b3,$o3); &FR($a3); | ||
| 66 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
| 67 | &add($o3,$cc,$o3); | ||
| 68 | &cmpult($o3,$cc,$cc); | ||
| 69 | &add($cc,$t3,$cc); &FR($t3); | ||
| 70 | |||
| 71 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
| 72 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
| 73 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
| 74 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
| 75 | |||
| 76 | &sub($count,4,$count); # count-=4 | ||
| 77 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 78 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 79 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 80 | |||
| 81 | &blt($count,&label("finish")); | ||
| 82 | &ld($a0,&QWPw(0,$ap)); | ||
| 83 | &ld($b0,&QWPw(0,$bp)); | ||
| 84 | &br(&label("loop")); | ||
| 85 | EOF | ||
| 86 | ################################################## | ||
| 87 | # Do the last 0..3 words | ||
| 88 | |||
| 89 | &set_label("last_loop"); | ||
| 90 | |||
| 91 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
| 92 | &mul($a0,$word,($l0)=&NR(1)); | ||
| 93 | &add($ap,$QWS,$ap); | ||
| 94 | &muh($a0,$word,($h0)=&NR(1)); &FR($a0); | ||
| 95 | &add($l0,$cc,$l0); | ||
| 96 | &add($rp,$QWS,$rp); | ||
| 97 | &sub($count,1,$count); | ||
| 98 | &cmpult($l0,$cc,$cc); | ||
| 99 | &st($l0,&QWPw(-1,$rp)); &FR($l0); | ||
| 100 | &add($h0,$cc,$cc); &FR($h0); | ||
| 101 | |||
| 102 | &bgt($count,&label("last_loop")); | ||
| 103 | &function_end_A($name); | ||
| 104 | |||
| 105 | ###################################################### | ||
| 106 | &set_label("finish"); | ||
| 107 | &add($count,4,$count); | ||
| 108 | &bgt($count,&label("last_loop")); | ||
| 109 | |||
| 110 | &set_label("end"); | ||
| 111 | &function_end($name); | ||
| 112 | |||
| 113 | &fin_pool; | ||
| 114 | } | ||
| 115 | |||
| 116 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl new file mode 100644 index 0000000000..e37f6315fb --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl | |||
| @@ -0,0 +1,120 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_mul_add_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r,$couny); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $count=&wparam(2); | ||
| 15 | $word=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | &br(&label("finish")); | ||
| 23 | &blt($count,&label("finish")); | ||
| 24 | |||
| 25 | ($a0,$r0)=&NR(2); | ||
| 26 | &ld($a0,&QWPw(0,$ap)); | ||
| 27 | &ld($r0,&QWPw(0,$rp)); | ||
| 28 | |||
| 29 | $a=<<'EOF'; | ||
| 30 | ########################################################## | ||
| 31 | &set_label("loop"); | ||
| 32 | |||
| 33 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
| 34 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
| 35 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
| 36 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
| 37 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
| 38 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
| 39 | |||
| 40 | ($o0,$t0)=&NR(2); | ||
| 41 | &add($a0,$b0,$o0); | ||
| 42 | &cmpult($o0,$b0,$t0); | ||
| 43 | &add($o0,$cc,$o0); | ||
| 44 | &cmpult($o0,$cc,$cc); | ||
| 45 | &add($cc,$t0,$cc); &FR($t0); | ||
| 46 | |||
| 47 | ($t1,$o1)=&NR(2); | ||
| 48 | |||
| 49 | &add($a1,$b1,$o1); &FR($a1); | ||
| 50 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
| 51 | &add($o1,$cc,$o1); | ||
| 52 | &cmpult($o1,$cc,$cc); | ||
| 53 | &add($cc,$t1,$cc); &FR($t1); | ||
| 54 | |||
| 55 | ($t2,$o2)=&NR(2); | ||
| 56 | |||
| 57 | &add($a2,$b2,$o2); &FR($a2); | ||
| 58 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
| 59 | &add($o2,$cc,$o2); | ||
| 60 | &cmpult($o2,$cc,$cc); | ||
| 61 | &add($cc,$t2,$cc); &FR($t2); | ||
| 62 | |||
| 63 | ($t3,$o3)=&NR(2); | ||
| 64 | |||
| 65 | &add($a3,$b3,$o3); &FR($a3); | ||
| 66 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
| 67 | &add($o3,$cc,$o3); | ||
| 68 | &cmpult($o3,$cc,$cc); | ||
| 69 | &add($cc,$t3,$cc); &FR($t3); | ||
| 70 | |||
| 71 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
| 72 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
| 73 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
| 74 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
| 75 | |||
| 76 | &sub($count,4,$count); # count-=4 | ||
| 77 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 78 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 79 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 80 | |||
| 81 | &blt($count,&label("finish")); | ||
| 82 | &ld($a0,&QWPw(0,$ap)); | ||
| 83 | &ld($b0,&QWPw(0,$bp)); | ||
| 84 | &br(&label("loop")); | ||
| 85 | EOF | ||
| 86 | ################################################## | ||
| 87 | # Do the last 0..3 words | ||
| 88 | |||
| 89 | &set_label("last_loop"); | ||
| 90 | |||
| 91 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
| 92 | &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b | ||
| 93 | &mul($a0,$word,($l0)=&NR(1)); | ||
| 94 | &sub($count,1,$count); | ||
| 95 | &add($ap,$QWS,$ap); | ||
| 96 | &muh($a0,$word,($h0)=&NR(1)); &FR($a0); | ||
| 97 | &add($r0,$l0,$r0); | ||
| 98 | &add($rp,$QWS,$rp); | ||
| 99 | &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); | ||
| 100 | &add($r0,$cc,$r0); | ||
| 101 | &add($h0,$t0,$h0); &FR($t0); | ||
| 102 | &cmpult($r0,$cc,$cc); | ||
| 103 | &st($r0,&QWPw(-1,$rp)); &FR($r0); | ||
| 104 | &add($h0,$cc,$cc); &FR($h0); | ||
| 105 | |||
| 106 | &bgt($count,&label("last_loop")); | ||
| 107 | &function_end_A($name); | ||
| 108 | |||
| 109 | ###################################################### | ||
| 110 | &set_label("finish"); | ||
| 111 | &add($count,4,$count); | ||
| 112 | &bgt($count,&label("last_loop")); | ||
| 113 | |||
| 114 | &set_label("end"); | ||
| 115 | &function_end($name); | ||
| 116 | |||
| 117 | &fin_pool; | ||
| 118 | } | ||
| 119 | |||
| 120 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl new file mode 100644 index 0000000000..5efd201281 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl | |||
| @@ -0,0 +1,213 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub mul_add_c | ||
| 5 | { | ||
| 6 | local($a,$b,$c0,$c1,$c2)=@_; | ||
| 7 | local($l1,$h1,$t1,$t2); | ||
| 8 | |||
| 9 | &mul($a,$b,($l1)=&NR(1)); | ||
| 10 | &muh($a,$b,($h1)=&NR(1)); | ||
| 11 | &add($c0,$l1,$c0); | ||
| 12 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
| 13 | &add($t1,$h1,$h1); &FR($t1); | ||
| 14 | &add($c1,$h1,$c1); | ||
| 15 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
| 16 | &add($c2,$t2,$c2); &FR($t2); | ||
| 17 | } | ||
| 18 | |||
| 19 | sub bn_mul_comba4 | ||
| 20 | { | ||
| 21 | local($name)=@_; | ||
| 22 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 23 | |||
| 24 | $cnt=1; | ||
| 25 | &init_pool(3); | ||
| 26 | |||
| 27 | $rp=&wparam(0); | ||
| 28 | $ap=&wparam(1); | ||
| 29 | $bp=&wparam(2); | ||
| 30 | |||
| 31 | &function_begin($name,""); | ||
| 32 | |||
| 33 | &comment(""); | ||
| 34 | |||
| 35 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 36 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
| 37 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 38 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
| 39 | &mul($a[0],$b[0],($r00)=&NR(1)); | ||
| 40 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 41 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
| 42 | &muh($a[0],$b[0],($r01)=&NR(1)); | ||
| 43 | &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
| 44 | &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp)); | ||
| 45 | &mul($a[0],$b[1],($r02)=&NR(1)); | ||
| 46 | |||
| 47 | ($R,$H1,$H2)=&NR(3); | ||
| 48 | |||
| 49 | &st($r00,&QWPw(0,$rp)); &FR($r00); | ||
| 50 | |||
| 51 | &mov("zero",$R); | ||
| 52 | &mul($a[1],$b[0],($r03)=&NR(1)); | ||
| 53 | |||
| 54 | &mov("zero",$H1); | ||
| 55 | &mov("zero",$H0); | ||
| 56 | &add($R,$r01,$R); | ||
| 57 | &muh($a[0],$b[1],($r04)=&NR(1)); | ||
| 58 | &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01); | ||
| 59 | &add($R,$r02,$R); | ||
| 60 | &add($H1,$t01,$H1) &FR($t01); | ||
| 61 | &muh($a[1],$b[0],($r05)=&NR(1)); | ||
| 62 | &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02); | ||
| 63 | &add($R,$r03,$R); | ||
| 64 | &add($H2,$t02,$H2) &FR($t02); | ||
| 65 | &mul($a[0],$b[2],($r06)=&NR(1)); | ||
| 66 | &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03); | ||
| 67 | &add($H1,$t03,$H1) &FR($t03); | ||
| 68 | &st($R,&QWPw(1,$rp)); | ||
| 69 | &add($H1,$H2,$R); | ||
| 70 | |||
| 71 | &mov("zero",$H1); | ||
| 72 | &add($R,$r04,$R); | ||
| 73 | &mov("zero",$H2); | ||
| 74 | &mul($a[1],$b[1],($r07)=&NR(1)); | ||
| 75 | &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04); | ||
| 76 | &add($R,$r05,$R); | ||
| 77 | &add($H1,$t04,$H1) &FR($t04); | ||
| 78 | &mul($a[2],$b[0],($r08)=&NR(1)); | ||
| 79 | &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05); | ||
| 80 | &add($R,$r01,$R); | ||
| 81 | &add($H2,$t05,$H2) &FR($t05); | ||
| 82 | &muh($a[0],$b[2],($r09)=&NR(1)); | ||
| 83 | &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06); | ||
| 84 | &add($R,$r07,$R); | ||
| 85 | &add($H1,$t06,$H1) &FR($t06); | ||
| 86 | &muh($a[1],$b[1],($r10)=&NR(1)); | ||
| 87 | &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07); | ||
| 88 | &add($R,$r08,$R); | ||
| 89 | &add($H2,$t07,$H2) &FR($t07); | ||
| 90 | &muh($a[2],$b[0],($r11)=&NR(1)); | ||
| 91 | &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08); | ||
| 92 | &add($H1,$t08,$H1) &FR($t08); | ||
| 93 | &st($R,&QWPw(2,$rp)); | ||
| 94 | &add($H1,$H2,$R); | ||
| 95 | |||
| 96 | &mov("zero",$H1); | ||
| 97 | &add($R,$r09,$R); | ||
| 98 | &mov("zero",$H2); | ||
| 99 | &mul($a[0],$b[3],($r12)=&NR(1)); | ||
| 100 | &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09); | ||
| 101 | &add($R,$r10,$R); | ||
| 102 | &add($H1,$t09,$H1) &FR($t09); | ||
| 103 | &mul($a[1],$b[2],($r13)=&NR(1)); | ||
| 104 | &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10); | ||
| 105 | &add($R,$r11,$R); | ||
| 106 | &add($H1,$t10,$H1) &FR($t10); | ||
| 107 | &mul($a[2],$b[1],($r14)=&NR(1)); | ||
| 108 | &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11); | ||
| 109 | &add($R,$r12,$R); | ||
| 110 | &add($H1,$t11,$H1) &FR($t11); | ||
| 111 | &mul($a[3],$b[0],($r15)=&NR(1)); | ||
| 112 | &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12); | ||
| 113 | &add($R,$r13,$R); | ||
| 114 | &add($H1,$t12,$H1) &FR($t12); | ||
| 115 | &muh($a[0],$b[3],($r16)=&NR(1)); | ||
| 116 | &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13); | ||
| 117 | &add($R,$r14,$R); | ||
| 118 | &add($H1,$t13,$H1) &FR($t13); | ||
| 119 | &muh($a[1],$b[2],($r17)=&NR(1)); | ||
| 120 | &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14); | ||
| 121 | &add($R,$r15,$R); | ||
| 122 | &add($H1,$t14,$H1) &FR($t14); | ||
| 123 | &muh($a[2],$b[1],($r18)=&NR(1)); | ||
| 124 | &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15); | ||
| 125 | &add($H1,$t15,$H1) &FR($t15); | ||
| 126 | &st($R,&QWPw(3,$rp)); | ||
| 127 | &add($H1,$H2,$R); | ||
| 128 | |||
| 129 | &mov("zero",$H1); | ||
| 130 | &add($R,$r16,$R); | ||
| 131 | &mov("zero",$H2); | ||
| 132 | &muh($a[3],$b[0],($r19)=&NR(1)); | ||
| 133 | &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16); | ||
| 134 | &add($R,$r17,$R); | ||
| 135 | &add($H1,$t16,$H1) &FR($t16); | ||
| 136 | &mul($a[1],$b[3],($r20)=&NR(1)); | ||
| 137 | &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17); | ||
| 138 | &add($R,$r18,$R); | ||
| 139 | &add($H1,$t17,$H1) &FR($t17); | ||
| 140 | &mul($a[2],$b[2],($r21)=&NR(1)); | ||
| 141 | &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18); | ||
| 142 | &add($R,$r19,$R); | ||
| 143 | &add($H1,$t18,$H1) &FR($t18); | ||
| 144 | &mul($a[3],$b[1],($r22)=&NR(1)); | ||
| 145 | &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19); | ||
| 146 | &add($R,$r20,$R); | ||
| 147 | &add($H1,$t19,$H1) &FR($t19); | ||
| 148 | &muh($a[1],$b[3],($r23)=&NR(1)); | ||
| 149 | &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20); | ||
| 150 | &add($R,$r21,$R); | ||
| 151 | &add($H1,$t20,$H1) &FR($t20); | ||
| 152 | &muh($a[2],$b[2],($r24)=&NR(1)); | ||
| 153 | &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21); | ||
| 154 | &add($R,$r22,$R); | ||
| 155 | &add($H1,$t21,$H1) &FR($t21); | ||
| 156 | &muh($a[3],$b[1],($r25)=&NR(1)); | ||
| 157 | &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22); | ||
| 158 | &add($H1,$t22,$H1) &FR($t22); | ||
| 159 | &st($R,&QWPw(4,$rp)); | ||
| 160 | &add($H1,$H2,$R); | ||
| 161 | |||
| 162 | &mov("zero",$H1); | ||
| 163 | &add($R,$r23,$R); | ||
| 164 | &mov("zero",$H2); | ||
| 165 | &mul($a[2],$b[3],($r26)=&NR(1)); | ||
| 166 | &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23); | ||
| 167 | &add($R,$r24,$R); | ||
| 168 | &add($H1,$t23,$H1) &FR($t23); | ||
| 169 | &mul($a[3],$b[2],($r27)=&NR(1)); | ||
| 170 | &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24); | ||
| 171 | &add($R,$r25,$R); | ||
| 172 | &add($H1,$t24,$H1) &FR($t24); | ||
| 173 | &muh($a[2],$b[3],($r28)=&NR(1)); | ||
| 174 | &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25); | ||
| 175 | &add($R,$r26,$R); | ||
| 176 | &add($H1,$t25,$H1) &FR($t25); | ||
| 177 | &muh($a[3],$b[2],($r29)=&NR(1)); | ||
| 178 | &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26); | ||
| 179 | &add($R,$r27,$R); | ||
| 180 | &add($H1,$t26,$H1) &FR($t26); | ||
| 181 | &mul($a[3],$b[3],($r30)=&NR(1)); | ||
| 182 | &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27); | ||
| 183 | &add($H1,$t27,$H1) &FR($t27); | ||
| 184 | &st($R,&QWPw(5,$rp)); | ||
| 185 | &add($H1,$H2,$R); | ||
| 186 | |||
| 187 | &mov("zero",$H1); | ||
| 188 | &add($R,$r28,$R); | ||
| 189 | &mov("zero",$H2); | ||
| 190 | &muh($a[3],$b[3],($r31)=&NR(1)); | ||
| 191 | &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28); | ||
| 192 | &add($R,$r29,$R); | ||
| 193 | &add($H1,$t28,$H1) &FR($t28); | ||
| 194 | ############ | ||
| 195 | &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29); | ||
| 196 | &add($R,$r30,$R); | ||
| 197 | &add($H1,$t29,$H1) &FR($t29); | ||
| 198 | ############ | ||
| 199 | &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30); | ||
| 200 | &add($H1,$t30,$H1) &FR($t30); | ||
| 201 | &st($R,&QWPw(6,$rp)); | ||
| 202 | &add($H1,$H2,$R); | ||
| 203 | |||
| 204 | &add($R,$r31,$R); &FR($r31); | ||
| 205 | &st($R,&QWPw(7,$rp)); | ||
| 206 | |||
| 207 | &FR($R,$H1,$H2); | ||
| 208 | &function_end($name); | ||
| 209 | |||
| 210 | &fin_pool; | ||
| 211 | } | ||
| 212 | |||
| 213 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl new file mode 100644 index 0000000000..79d86dd25c --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl | |||
| @@ -0,0 +1,98 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub mul_add_c | ||
| 5 | { | ||
| 6 | local($a,$b,$c0,$c1,$c2)=@_; | ||
| 7 | local($l1,$h1,$t1,$t2); | ||
| 8 | |||
| 9 | print STDERR "count=$cnt\n"; $cnt++; | ||
| 10 | &mul($a,$b,($l1)=&NR(1)); | ||
| 11 | &muh($a,$b,($h1)=&NR(1)); | ||
| 12 | &add($c0,$l1,$c0); | ||
| 13 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
| 14 | &add($t1,$h1,$h1); &FR($t1); | ||
| 15 | &add($c1,$h1,$c1); | ||
| 16 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
| 17 | &add($c2,$t2,$c2); &FR($t2); | ||
| 18 | } | ||
| 19 | |||
| 20 | sub bn_mul_comba4 | ||
| 21 | { | ||
| 22 | local($name)=@_; | ||
| 23 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 24 | |||
| 25 | $cnt=1; | ||
| 26 | &init_pool(3); | ||
| 27 | |||
| 28 | $rp=&wparam(0); | ||
| 29 | $ap=&wparam(1); | ||
| 30 | $bp=&wparam(2); | ||
| 31 | |||
| 32 | &function_begin($name,""); | ||
| 33 | |||
| 34 | &comment(""); | ||
| 35 | |||
| 36 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 37 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
| 38 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 39 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
| 40 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 41 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
| 42 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); | ||
| 43 | &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp); | ||
| 44 | |||
| 45 | ($c0,$c1,$c2)=&NR(3); | ||
| 46 | &mov("zero",$c2); | ||
| 47 | &mul($a[0],$b[0],$c0); | ||
| 48 | &muh($a[0],$b[0],$c1); | ||
| 49 | &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 50 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 51 | &mov("zero",$c2); | ||
| 52 | |||
| 53 | &mul_add_c($a[0],$b[1],$c0,$c1,$c2); | ||
| 54 | &mul_add_c($a[1],$b[0],$c0,$c1,$c2); | ||
| 55 | &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 56 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 57 | &mov("zero",$c2); | ||
| 58 | |||
| 59 | &mul_add_c($a[1],$b[1],$c0,$c1,$c2); | ||
| 60 | &mul_add_c($a[0],$b[2],$c0,$c1,$c2); | ||
| 61 | &mul_add_c($a[2],$b[0],$c0,$c1,$c2); | ||
| 62 | &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 63 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 64 | &mov("zero",$c2); | ||
| 65 | |||
| 66 | &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]); | ||
| 67 | &mul_add_c($a[1],$b[2],$c0,$c1,$c2); | ||
| 68 | &mul_add_c($a[2],$b[1],$c0,$c1,$c2); | ||
| 69 | &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]); | ||
| 70 | &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 71 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 72 | &mov("zero",$c2); | ||
| 73 | |||
| 74 | &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]); | ||
| 75 | &mul_add_c($a[2],$b[2],$c0,$c1,$c2); | ||
| 76 | &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]); | ||
| 77 | &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 78 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 79 | &mov("zero",$c2); | ||
| 80 | |||
| 81 | &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]); | ||
| 82 | &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]); | ||
| 83 | &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 84 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 85 | &mov("zero",$c2); | ||
| 86 | |||
| 87 | &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]); | ||
| 88 | &st($c0,&QWPw(6,$rp)); | ||
| 89 | &st($c1,&QWPw(7,$rp)); | ||
| 90 | |||
| 91 | &FR($c0,$c1,$c2); | ||
| 92 | |||
| 93 | &function_end($name); | ||
| 94 | |||
| 95 | &fin_pool; | ||
| 96 | } | ||
| 97 | |||
| 98 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl new file mode 100644 index 0000000000..525ca7494b --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl | |||
| @@ -0,0 +1,177 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_mul_comba8 | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 8 | |||
| 9 | $cnt=1; | ||
| 10 | &init_pool(3); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $bp=&wparam(2); | ||
| 15 | |||
| 16 | &function_begin($name,""); | ||
| 17 | |||
| 18 | &comment(""); | ||
| 19 | |||
| 20 | &stack_push(2); | ||
| 21 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 22 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
| 23 | &st($reg_s0,&swtmp(0)); &FR($reg_s0); | ||
| 24 | &st($reg_s1,&swtmp(1)); &FR($reg_s1); | ||
| 25 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 26 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
| 27 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 28 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
| 29 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
| 30 | &ld(($b[3])=&NR(1),&QWPw(3,$bp)); | ||
| 31 | &ld(($a[4])=&NR(1),&QWPw(1,$ap)); | ||
| 32 | &ld(($b[4])=&NR(1),&QWPw(1,$bp)); | ||
| 33 | &ld(($a[5])=&NR(1),&QWPw(1,$ap)); | ||
| 34 | &ld(($b[5])=&NR(1),&QWPw(1,$bp)); | ||
| 35 | &ld(($a[6])=&NR(1),&QWPw(1,$ap)); | ||
| 36 | &ld(($b[6])=&NR(1),&QWPw(1,$bp)); | ||
| 37 | &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap); | ||
| 38 | &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp); | ||
| 39 | |||
| 40 | ($c0,$c1,$c2)=&NR(3); | ||
| 41 | &mov("zero",$c2); | ||
| 42 | &mul($a[0],$b[0],$c0); | ||
| 43 | &muh($a[0],$b[0],$c1); | ||
| 44 | &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 45 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 46 | &mov("zero",$c2); | ||
| 47 | |||
| 48 | &mul_add_c($a[0],$b[1],$c0,$c1,$c2); | ||
| 49 | &mul_add_c($a[1],$b[0],$c0,$c1,$c2); | ||
| 50 | &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 51 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 52 | &mov("zero",$c2); | ||
| 53 | |||
| 54 | &mul_add_c($a[0],$b[2],$c0,$c1,$c2); | ||
| 55 | &mul_add_c($a[1],$b[1],$c0,$c1,$c2); | ||
| 56 | &mul_add_c($a[2],$b[0],$c0,$c1,$c2); | ||
| 57 | &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 58 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 59 | &mov("zero",$c2); | ||
| 60 | |||
| 61 | &mul_add_c($a[0],$b[3],$c0,$c1,$c2); | ||
| 62 | &mul_add_c($a[1],$b[2],$c0,$c1,$c2); | ||
| 63 | &mul_add_c($a[2],$b[1],$c0,$c1,$c2); | ||
| 64 | &mul_add_c($a[3],$b[0],$c0,$c1,$c2); | ||
| 65 | &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 66 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 67 | &mov("zero",$c2); | ||
| 68 | |||
| 69 | &mul_add_c($a[0],$b[4],$c0,$c1,$c2); | ||
| 70 | &mul_add_c($a[1],$b[3],$c0,$c1,$c2); | ||
| 71 | &mul_add_c($a[2],$b[2],$c0,$c1,$c2); | ||
| 72 | &mul_add_c($a[3],$b[1],$c0,$c1,$c2); | ||
| 73 | &mul_add_c($a[4],$b[0],$c0,$c1,$c2); | ||
| 74 | &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 75 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 76 | &mov("zero",$c2); | ||
| 77 | |||
| 78 | &mul_add_c($a[0],$b[5],$c0,$c1,$c2); | ||
| 79 | &mul_add_c($a[1],$b[4],$c0,$c1,$c2); | ||
| 80 | &mul_add_c($a[2],$b[3],$c0,$c1,$c2); | ||
| 81 | &mul_add_c($a[3],$b[2],$c0,$c1,$c2); | ||
| 82 | &mul_add_c($a[4],$b[1],$c0,$c1,$c2); | ||
| 83 | &mul_add_c($a[5],$b[0],$c0,$c1,$c2); | ||
| 84 | &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 85 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 86 | &mov("zero",$c2); | ||
| 87 | |||
| 88 | &mul_add_c($a[0],$b[6],$c0,$c1,$c2); | ||
| 89 | &mul_add_c($a[1],$b[5],$c0,$c1,$c2); | ||
| 90 | &mul_add_c($a[2],$b[4],$c0,$c1,$c2); | ||
| 91 | &mul_add_c($a[3],$b[3],$c0,$c1,$c2); | ||
| 92 | &mul_add_c($a[4],$b[2],$c0,$c1,$c2); | ||
| 93 | &mul_add_c($a[5],$b[1],$c0,$c1,$c2); | ||
| 94 | &mul_add_c($a[6],$b[0],$c0,$c1,$c2); | ||
| 95 | &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 96 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 97 | &mov("zero",$c2); | ||
| 98 | |||
| 99 | &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]); | ||
| 100 | &mul_add_c($a[1],$b[6],$c0,$c1,$c2); | ||
| 101 | &mul_add_c($a[2],$b[5],$c0,$c1,$c2); | ||
| 102 | &mul_add_c($a[3],$b[4],$c0,$c1,$c2); | ||
| 103 | &mul_add_c($a[4],$b[3],$c0,$c1,$c2); | ||
| 104 | &mul_add_c($a[5],$b[2],$c0,$c1,$c2); | ||
| 105 | &mul_add_c($a[6],$b[1],$c0,$c1,$c2); | ||
| 106 | &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]); | ||
| 107 | &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 108 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 109 | &mov("zero",$c2); | ||
| 110 | |||
| 111 | &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]); | ||
| 112 | &mul_add_c($a[2],$b[6],$c0,$c1,$c2); | ||
| 113 | &mul_add_c($a[3],$b[5],$c0,$c1,$c2); | ||
| 114 | &mul_add_c($a[4],$b[4],$c0,$c1,$c2); | ||
| 115 | &mul_add_c($a[5],$b[3],$c0,$c1,$c2); | ||
| 116 | &mul_add_c($a[6],$b[2],$c0,$c1,$c2); | ||
| 117 | &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]); | ||
| 118 | &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 119 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 120 | &mov("zero",$c2); | ||
| 121 | |||
| 122 | &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]); | ||
| 123 | &mul_add_c($a[3],$b[6],$c0,$c1,$c2); | ||
| 124 | &mul_add_c($a[4],$b[5],$c0,$c1,$c2); | ||
| 125 | &mul_add_c($a[5],$b[4],$c0,$c1,$c2); | ||
| 126 | &mul_add_c($a[6],$b[3],$c0,$c1,$c2); | ||
| 127 | &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]); | ||
| 128 | &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 129 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 130 | &mov("zero",$c2); | ||
| 131 | |||
| 132 | &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]); | ||
| 133 | &mul_add_c($a[4],$b[6],$c0,$c1,$c2); | ||
| 134 | &mul_add_c($a[5],$b[5],$c0,$c1,$c2); | ||
| 135 | &mul_add_c($a[6],$b[4],$c0,$c1,$c2); | ||
| 136 | &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]); | ||
| 137 | &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 138 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 139 | &mov("zero",$c2); | ||
| 140 | |||
| 141 | &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]); | ||
| 142 | &mul_add_c($a[5],$b[6],$c0,$c1,$c2); | ||
| 143 | &mul_add_c($a[6],$b[5],$c0,$c1,$c2); | ||
| 144 | &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]); | ||
| 145 | &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 146 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 147 | &mov("zero",$c2); | ||
| 148 | |||
| 149 | &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]); | ||
| 150 | &mul_add_c($a[6],$b[6],$c0,$c1,$c2); | ||
| 151 | &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]); | ||
| 152 | &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 153 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 154 | &mov("zero",$c2); | ||
| 155 | |||
| 156 | &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]); | ||
| 157 | &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]); | ||
| 158 | &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 159 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 160 | &mov("zero",$c2); | ||
| 161 | |||
| 162 | &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]); | ||
| 163 | &st($c0,&QWPw(14,$rp)); | ||
| 164 | &st($c1,&QWPw(15,$rp)); | ||
| 165 | |||
| 166 | &FR($c0,$c1,$c2); | ||
| 167 | |||
| 168 | &ld($reg_s0,&swtmp(0)); | ||
| 169 | &ld($reg_s1,&swtmp(1)); | ||
| 170 | &stack_pop(2); | ||
| 171 | |||
| 172 | &function_end($name); | ||
| 173 | |||
| 174 | &fin_pool; | ||
| 175 | } | ||
| 176 | |||
| 177 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl new file mode 100644 index 0000000000..a55b696906 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl | |||
| @@ -0,0 +1,113 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_sqr_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r,$couny); | ||
| 8 | |||
| 9 | &init_pool(3); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $count=&wparam(2); | ||
| 15 | |||
| 16 | &function_begin($name,""); | ||
| 17 | |||
| 18 | &comment(""); | ||
| 19 | &sub($count,4,$count); | ||
| 20 | &mov("zero",$cc); | ||
| 21 | &br(&label("finish")); | ||
| 22 | &blt($count,&label("finish")); | ||
| 23 | |||
| 24 | ($a0,$r0)=&NR(2); | ||
| 25 | &ld($a0,&QWPw(0,$ap)); | ||
| 26 | &ld($r0,&QWPw(0,$rp)); | ||
| 27 | |||
| 28 | $a=<<'EOF'; | ||
| 29 | ########################################################## | ||
| 30 | &set_label("loop"); | ||
| 31 | |||
| 32 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
| 33 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
| 34 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
| 35 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
| 36 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
| 37 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
| 38 | |||
| 39 | ($o0,$t0)=&NR(2); | ||
| 40 | &add($a0,$b0,$o0); | ||
| 41 | &cmpult($o0,$b0,$t0); | ||
| 42 | &add($o0,$cc,$o0); | ||
| 43 | &cmpult($o0,$cc,$cc); | ||
| 44 | &add($cc,$t0,$cc); &FR($t0); | ||
| 45 | |||
| 46 | ($t1,$o1)=&NR(2); | ||
| 47 | |||
| 48 | &add($a1,$b1,$o1); &FR($a1); | ||
| 49 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
| 50 | &add($o1,$cc,$o1); | ||
| 51 | &cmpult($o1,$cc,$cc); | ||
| 52 | &add($cc,$t1,$cc); &FR($t1); | ||
| 53 | |||
| 54 | ($t2,$o2)=&NR(2); | ||
| 55 | |||
| 56 | &add($a2,$b2,$o2); &FR($a2); | ||
| 57 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
| 58 | &add($o2,$cc,$o2); | ||
| 59 | &cmpult($o2,$cc,$cc); | ||
| 60 | &add($cc,$t2,$cc); &FR($t2); | ||
| 61 | |||
| 62 | ($t3,$o3)=&NR(2); | ||
| 63 | |||
| 64 | &add($a3,$b3,$o3); &FR($a3); | ||
| 65 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
| 66 | &add($o3,$cc,$o3); | ||
| 67 | &cmpult($o3,$cc,$cc); | ||
| 68 | &add($cc,$t3,$cc); &FR($t3); | ||
| 69 | |||
| 70 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
| 71 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
| 72 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
| 73 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
| 74 | |||
| 75 | &sub($count,4,$count); # count-=4 | ||
| 76 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 77 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 78 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 79 | |||
| 80 | &blt($count,&label("finish")); | ||
| 81 | &ld($a0,&QWPw(0,$ap)); | ||
| 82 | &ld($b0,&QWPw(0,$bp)); | ||
| 83 | &br(&label("loop")); | ||
| 84 | EOF | ||
| 85 | ################################################## | ||
| 86 | # Do the last 0..3 words | ||
| 87 | |||
| 88 | &set_label("last_loop"); | ||
| 89 | |||
| 90 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
| 91 | &mul($a0,$a0,($l0)=&NR(1)); | ||
| 92 | &add($ap,$QWS,$ap); | ||
| 93 | &add($rp,2*$QWS,$rp); | ||
| 94 | &sub($count,1,$count); | ||
| 95 | &muh($a0,$a0,($h0)=&NR(1)); &FR($a0); | ||
| 96 | &st($l0,&QWPw(-2,$rp)); &FR($l0); | ||
| 97 | &st($h0,&QWPw(-1,$rp)); &FR($h0); | ||
| 98 | |||
| 99 | &bgt($count,&label("last_loop")); | ||
| 100 | &function_end_A($name); | ||
| 101 | |||
| 102 | ###################################################### | ||
| 103 | &set_label("finish"); | ||
| 104 | &add($count,4,$count); | ||
| 105 | &bgt($count,&label("last_loop")); | ||
| 106 | |||
| 107 | &set_label("end"); | ||
| 108 | &function_end($name); | ||
| 109 | |||
| 110 | &fin_pool; | ||
| 111 | } | ||
| 112 | |||
| 113 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl new file mode 100644 index 0000000000..bf33f5b503 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl | |||
| @@ -0,0 +1,109 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub sqr_add_c | ||
| 5 | { | ||
| 6 | local($a,$c0,$c1,$c2)=@_; | ||
| 7 | local($l1,$h1,$t1,$t2); | ||
| 8 | |||
| 9 | &mul($a,$a,($l1)=&NR(1)); | ||
| 10 | &muh($a,$a,($h1)=&NR(1)); | ||
| 11 | &add($c0,$l1,$c0); | ||
| 12 | &add($c1,$h1,$c1); | ||
| 13 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
| 14 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
| 15 | &add($c1,$t1,$c1); &FR($t1); | ||
| 16 | &add($c2,$t2,$c2); &FR($t2); | ||
| 17 | } | ||
| 18 | |||
| 19 | sub sqr_add_c2 | ||
| 20 | { | ||
| 21 | local($a,$b,$c0,$c1,$c2)=@_; | ||
| 22 | local($l1,$h1,$t1,$t2); | ||
| 23 | |||
| 24 | &mul($a,$b,($l1)=&NR(1)); | ||
| 25 | &muh($a,$b,($h1)=&NR(1)); | ||
| 26 | &cmplt($l1,"zero",($lc1)=&NR(1)); | ||
| 27 | &cmplt($h1,"zero",($hc1)=&NR(1)); | ||
| 28 | &add($l1,$l1,$l1); | ||
| 29 | &add($h1,$h1,$h1); | ||
| 30 | &add($h1,$lc1,$h1); &FR($lc1); | ||
| 31 | &add($c2,$hc1,$c2); &FR($hc1); | ||
| 32 | |||
| 33 | &add($c0,$l1,$c0); | ||
| 34 | &add($c1,$h1,$c1); | ||
| 35 | &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1); | ||
| 36 | &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1); | ||
| 37 | |||
| 38 | &add($c1,$lc1,$c1); &FR($lc1); | ||
| 39 | &add($c2,$hc1,$c2); &FR($hc1); | ||
| 40 | } | ||
| 41 | |||
| 42 | |||
| 43 | sub bn_sqr_comba4 | ||
| 44 | { | ||
| 45 | local($name)=@_; | ||
| 46 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 47 | |||
| 48 | $cnt=1; | ||
| 49 | &init_pool(2); | ||
| 50 | |||
| 51 | $rp=&wparam(0); | ||
| 52 | $ap=&wparam(1); | ||
| 53 | |||
| 54 | &function_begin($name,""); | ||
| 55 | |||
| 56 | &comment(""); | ||
| 57 | |||
| 58 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 59 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 60 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 61 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); | ||
| 62 | |||
| 63 | ($c0,$c1,$c2)=&NR(3); | ||
| 64 | |||
| 65 | &mov("zero",$c2); | ||
| 66 | &mul($a[0],$a[0],$c0); | ||
| 67 | &muh($a[0],$a[0],$c1); | ||
| 68 | &st($c0,&QWPw(0,$rp)); | ||
| 69 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 70 | &mov("zero",$c2); | ||
| 71 | |||
| 72 | &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2); | ||
| 73 | &st($c0,&QWPw(1,$rp)); | ||
| 74 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 75 | &mov("zero",$c2); | ||
| 76 | |||
| 77 | &sqr_add_c($a[1],$c0,$c1,$c2); | ||
| 78 | &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); | ||
| 79 | &st($c0,&QWPw(2,$rp)); | ||
| 80 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 81 | &mov("zero",$c2); | ||
| 82 | |||
| 83 | &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); | ||
| 84 | &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); | ||
| 85 | &st($c0,&QWPw(3,$rp)); | ||
| 86 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 87 | &mov("zero",$c2); | ||
| 88 | |||
| 89 | &sqr_add_c($a[2],$c0,$c1,$c2); | ||
| 90 | &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); | ||
| 91 | &st($c0,&QWPw(4,$rp)); | ||
| 92 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 93 | &mov("zero",$c2); | ||
| 94 | |||
| 95 | &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); | ||
| 96 | &st($c0,&QWPw(5,$rp)); | ||
| 97 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 98 | &mov("zero",$c2); | ||
| 99 | |||
| 100 | &sqr_add_c($a[3],$c0,$c1,$c2); | ||
| 101 | &st($c0,&QWPw(6,$rp)); | ||
| 102 | &st($c1,&QWPw(7,$rp)); | ||
| 103 | |||
| 104 | &function_end($name); | ||
| 105 | |||
| 106 | &fin_pool; | ||
| 107 | } | ||
| 108 | |||
| 109 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl new file mode 100644 index 0000000000..b4afe085f1 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl | |||
| @@ -0,0 +1,132 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_sqr_comba8 | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 8 | |||
| 9 | $cnt=1; | ||
| 10 | &init_pool(2); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | |||
| 15 | &function_begin($name,""); | ||
| 16 | |||
| 17 | &comment(""); | ||
| 18 | |||
| 19 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 20 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 21 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 22 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
| 23 | &ld(($a[4])=&NR(1),&QWPw(4,$ap)); | ||
| 24 | &ld(($a[5])=&NR(1),&QWPw(5,$ap)); | ||
| 25 | &ld(($a[6])=&NR(1),&QWPw(6,$ap)); | ||
| 26 | &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap); | ||
| 27 | |||
| 28 | ($c0,$c1,$c2)=&NR(3); | ||
| 29 | |||
| 30 | &mov("zero",$c2); | ||
| 31 | &mul($a[0],$a[0],$c0); | ||
| 32 | &muh($a[0],$a[0],$c1); | ||
| 33 | &st($c0,&QWPw(0,$rp)); | ||
| 34 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 35 | &mov("zero",$c2); | ||
| 36 | |||
| 37 | &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2); | ||
| 38 | &st($c0,&QWPw(1,$rp)); | ||
| 39 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 40 | &mov("zero",$c2); | ||
| 41 | |||
| 42 | &sqr_add_c($a[1],$c0,$c1,$c2); | ||
| 43 | &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); | ||
| 44 | &st($c0,&QWPw(2,$rp)); | ||
| 45 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 46 | &mov("zero",$c2); | ||
| 47 | |||
| 48 | &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); | ||
| 49 | &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); | ||
| 50 | &st($c0,&QWPw(3,$rp)); | ||
| 51 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 52 | &mov("zero",$c2); | ||
| 53 | |||
| 54 | &sqr_add_c($a[2],$c0,$c1,$c2); | ||
| 55 | &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); | ||
| 56 | &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2); | ||
| 57 | &st($c0,&QWPw(4,$rp)); | ||
| 58 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 59 | &mov("zero",$c2); | ||
| 60 | |||
| 61 | &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); | ||
| 62 | &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2); | ||
| 63 | &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2); | ||
| 64 | &st($c0,&QWPw(5,$rp)); | ||
| 65 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 66 | &mov("zero",$c2); | ||
| 67 | |||
| 68 | &sqr_add_c($a[3],$c0,$c1,$c2); | ||
| 69 | &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2); | ||
| 70 | &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2); | ||
| 71 | &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2); | ||
| 72 | &st($c0,&QWPw(6,$rp)); | ||
| 73 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 74 | &mov("zero",$c2); | ||
| 75 | |||
| 76 | &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2); | ||
| 77 | &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2); | ||
| 78 | &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2); | ||
| 79 | &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2); | ||
| 80 | &st($c0,&QWPw(7,$rp)); | ||
| 81 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 82 | &mov("zero",$c2); | ||
| 83 | |||
| 84 | &sqr_add_c($a[4],$c0,$c1,$c2); | ||
| 85 | &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2); | ||
| 86 | &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2); | ||
| 87 | &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2); | ||
| 88 | &st($c0,&QWPw(8,$rp)); | ||
| 89 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 90 | &mov("zero",$c2); | ||
| 91 | |||
| 92 | &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2); | ||
| 93 | &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2); | ||
| 94 | &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2); | ||
| 95 | &st($c0,&QWPw(9,$rp)); | ||
| 96 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 97 | &mov("zero",$c2); | ||
| 98 | |||
| 99 | &sqr_add_c($a[5],$c0,$c1,$c2); | ||
| 100 | &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2); | ||
| 101 | &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2); | ||
| 102 | &st($c0,&QWPw(10,$rp)); | ||
| 103 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 104 | &mov("zero",$c2); | ||
| 105 | |||
| 106 | &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2); | ||
| 107 | &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2); | ||
| 108 | &st($c0,&QWPw(11,$rp)); | ||
| 109 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 110 | &mov("zero",$c2); | ||
| 111 | |||
| 112 | &sqr_add_c($a[6],$c0,$c1,$c2); | ||
| 113 | &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2); | ||
| 114 | &st($c0,&QWPw(12,$rp)); | ||
| 115 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 116 | &mov("zero",$c2); | ||
| 117 | |||
| 118 | &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2); | ||
| 119 | &st($c0,&QWPw(13,$rp)); | ||
| 120 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 121 | &mov("zero",$c2); | ||
| 122 | |||
| 123 | &sqr_add_c($a[7],$c0,$c1,$c2); | ||
| 124 | &st($c0,&QWPw(14,$rp)); | ||
| 125 | &st($c1,&QWPw(15,$rp)); | ||
| 126 | |||
| 127 | &function_end($name); | ||
| 128 | |||
| 129 | &fin_pool; | ||
| 130 | } | ||
| 131 | |||
| 132 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sub.pl b/src/lib/libcrypto/bn/asm/alpha.works/sub.pl new file mode 100644 index 0000000000..d998da5c21 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sub.pl | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_sub_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $bp=&wparam(2); | ||
| 15 | $count=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | &blt($count,&label("finish")); | ||
| 23 | |||
| 24 | ($a0,$b0)=&NR(2); | ||
| 25 | &ld($a0,&QWPw(0,$ap)); | ||
| 26 | &ld($b0,&QWPw(0,$bp)); | ||
| 27 | |||
| 28 | ########################################################## | ||
| 29 | &set_label("loop"); | ||
| 30 | |||
| 31 | ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8); | ||
| 32 | &ld($a1,&QWPw(1,$ap)); | ||
| 33 | &cmpult($a0,$b0,$tmp); # will we borrow? | ||
| 34 | &ld($b1,&QWPw(1,$bp)); | ||
| 35 | &sub($a0,$b0,$a0); # do the subtract | ||
| 36 | &ld($a2,&QWPw(2,$ap)); | ||
| 37 | &cmpult($a0,$cc,$b0); # will we borrow? | ||
| 38 | &ld($b2,&QWPw(2,$bp)); | ||
| 39 | &sub($a0,$cc,$o0); # will we borrow? | ||
| 40 | &ld($a3,&QWPw(3,$ap)); | ||
| 41 | &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp); | ||
| 42 | |||
| 43 | &cmpult($a1,$b1,$t1); # will we borrow? | ||
| 44 | &sub($a1,$b1,$a1); # do the subtract | ||
| 45 | &ld($b3,&QWPw(3,$bp)); | ||
| 46 | &cmpult($a1,$cc,$b1); # will we borrow? | ||
| 47 | &sub($a1,$cc,$o1); # will we borrow? | ||
| 48 | &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1); | ||
| 49 | |||
| 50 | &cmpult($a2,$b2,$tmp); # will we borrow? | ||
| 51 | &sub($a2,$b2,$a2); # do the subtract | ||
| 52 | &st($o0,&QWPw(0,$rp)); &FR($o0); # save | ||
| 53 | &cmpult($a2,$cc,$b2); # will we borrow? | ||
| 54 | &sub($a2,$cc,$o2); # will we borrow? | ||
| 55 | &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2); | ||
| 56 | |||
| 57 | &cmpult($a3,$b3,$t3); # will we borrow? | ||
| 58 | &sub($a3,$b3,$a3); # do the subtract | ||
| 59 | &st($o1,&QWPw(1,$rp)); &FR($o1); | ||
| 60 | &cmpult($a3,$cc,$b3); # will we borrow? | ||
| 61 | &sub($a3,$cc,$o3); # will we borrow? | ||
| 62 | &add($b3,$t3,$cc); &FR($t3,$a3,$b3); | ||
| 63 | |||
| 64 | &st($o2,&QWPw(2,$rp)); &FR($o2); | ||
| 65 | &sub($count,4,$count); # count-=4 | ||
| 66 | &st($o3,&QWPw(3,$rp)); &FR($o3); | ||
| 67 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 68 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 69 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 70 | |||
| 71 | &blt($count,&label("finish")); | ||
| 72 | &ld($a0,&QWPw(0,$ap)); | ||
| 73 | &ld($b0,&QWPw(0,$bp)); | ||
| 74 | &br(&label("loop")); | ||
| 75 | ################################################## | ||
| 76 | # Do the last 0..3 words | ||
| 77 | |||
| 78 | &set_label("last_loop"); | ||
| 79 | |||
| 80 | &ld($a0,&QWPw(0,$ap)); # get a | ||
| 81 | &ld($b0,&QWPw(0,$bp)); # get b | ||
| 82 | &cmpult($a0,$b0,$tmp); # will we borrow? | ||
| 83 | &sub($a0,$b0,$a0); # do the subtract | ||
| 84 | &cmpult($a0,$cc,$b0); # will we borrow? | ||
| 85 | &sub($a0,$cc,$a0); # will we borrow? | ||
| 86 | &st($a0,&QWPw(0,$rp)); # save | ||
| 87 | &add($b0,$tmp,$cc); # add the borrows | ||
| 88 | |||
| 89 | &add($ap,$QWS,$ap); | ||
| 90 | &add($bp,$QWS,$bp); | ||
| 91 | &add($rp,$QWS,$rp); | ||
| 92 | &sub($count,1,$count); | ||
| 93 | &bgt($count,&label("last_loop")); | ||
| 94 | &function_end_A($name); | ||
| 95 | |||
| 96 | ###################################################### | ||
| 97 | &set_label("finish"); | ||
| 98 | &add($count,4,$count); | ||
| 99 | &bgt($count,&label("last_loop")); | ||
| 100 | |||
| 101 | &FR($a0,$b0); | ||
| 102 | &set_label("end"); | ||
| 103 | &function_end($name); | ||
| 104 | |||
| 105 | &fin_pool; | ||
| 106 | } | ||
| 107 | |||
| 108 | 1; | ||
