diff options
| author | beck <> | 1999-09-29 05:53:45 +0000 |
|---|---|---|
| committer | beck <> | 1999-09-29 05:53:45 +0000 |
| commit | 648e4f0876a3773381cbfff3192dd84dd1c8c925 (patch) | |
| tree | bd9d01e3969ffa5aac92128af3e515520c88fc0e /src/lib/libcrypto/bn | |
| parent | 756086c41b0487beefc3d5b3400f80095d0e4157 (diff) | |
| download | openbsd-648e4f0876a3773381cbfff3192dd84dd1c8c925.tar.gz openbsd-648e4f0876a3773381cbfff3192dd84dd1c8c925.tar.bz2 openbsd-648e4f0876a3773381cbfff3192dd84dd1c8c925.zip | |
new files for OpenSSL 0.9.4
Diffstat (limited to 'src/lib/libcrypto/bn')
41 files changed, 5507 insertions, 0 deletions
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/add.pl b/src/lib/libcrypto/bn/asm/alpha.works/add.pl new file mode 100644 index 0000000000..4dc76e6b69 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/add.pl | |||
| @@ -0,0 +1,119 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_add_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $bp=&wparam(2); | ||
| 15 | $count=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | &br(&label("finish")); | ||
| 23 | &blt($count,&label("finish")); | ||
| 24 | |||
| 25 | ($a0,$b0)=&NR(2); | ||
| 26 | &ld($a0,&QWPw(0,$ap)); | ||
| 27 | &ld($b0,&QWPw(0,$bp)); | ||
| 28 | |||
| 29 | ########################################################## | ||
| 30 | &set_label("loop"); | ||
| 31 | |||
| 32 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
| 33 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
| 34 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
| 35 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
| 36 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
| 37 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
| 38 | |||
| 39 | ($o0,$t0)=&NR(2); | ||
| 40 | &add($a0,$b0,$o0); | ||
| 41 | &cmpult($o0,$b0,$t0); | ||
| 42 | &add($o0,$cc,$o0); | ||
| 43 | &cmpult($o0,$cc,$cc); | ||
| 44 | &add($cc,$t0,$cc); &FR($t0); | ||
| 45 | |||
| 46 | ($t1,$o1)=&NR(2); | ||
| 47 | |||
| 48 | &add($a1,$b1,$o1); &FR($a1); | ||
| 49 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
| 50 | &add($o1,$cc,$o1); | ||
| 51 | &cmpult($o1,$cc,$cc); | ||
| 52 | &add($cc,$t1,$cc); &FR($t1); | ||
| 53 | |||
| 54 | ($t2,$o2)=&NR(2); | ||
| 55 | |||
| 56 | &add($a2,$b2,$o2); &FR($a2); | ||
| 57 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
| 58 | &add($o2,$cc,$o2); | ||
| 59 | &cmpult($o2,$cc,$cc); | ||
| 60 | &add($cc,$t2,$cc); &FR($t2); | ||
| 61 | |||
| 62 | ($t3,$o3)=&NR(2); | ||
| 63 | |||
| 64 | &add($a3,$b3,$o3); &FR($a3); | ||
| 65 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
| 66 | &add($o3,$cc,$o3); | ||
| 67 | &cmpult($o3,$cc,$cc); | ||
| 68 | &add($cc,$t3,$cc); &FR($t3); | ||
| 69 | |||
| 70 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
| 71 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
| 72 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
| 73 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
| 74 | |||
| 75 | &sub($count,4,$count); # count-=4 | ||
| 76 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 77 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 78 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 79 | |||
| 80 | &blt($count,&label("finish")); | ||
| 81 | &ld($a0,&QWPw(0,$ap)); | ||
| 82 | &ld($b0,&QWPw(0,$bp)); | ||
| 83 | &br(&label("loop")); | ||
| 84 | ################################################## | ||
| 85 | # Do the last 0..3 words | ||
| 86 | |||
| 87 | ($t0,$o0)=&NR(2); | ||
| 88 | &set_label("last_loop"); | ||
| 89 | |||
| 90 | &ld($a0,&QWPw(0,$ap)); # get a | ||
| 91 | &ld($b0,&QWPw(0,$bp)); # get b | ||
| 92 | |||
| 93 | &add($a0,$b0,$o0); | ||
| 94 | &cmpult($o0,$b0,$t0); # will we borrow? | ||
| 95 | &add($o0,$cc,$o0); # will we borrow? | ||
| 96 | &cmpult($o0,$cc,$cc); # will we borrow? | ||
| 97 | &add($cc,$t0,$cc); # add the borrows | ||
| 98 | &st($o0,&QWPw(0,$rp)); # save | ||
| 99 | |||
| 100 | &add($ap,$QWS,$ap); | ||
| 101 | &add($bp,$QWS,$bp); | ||
| 102 | &add($rp,$QWS,$rp); | ||
| 103 | &sub($count,1,$count); | ||
| 104 | &bgt($count,&label("last_loop")); | ||
| 105 | &function_end_A($name); | ||
| 106 | |||
| 107 | ###################################################### | ||
| 108 | &set_label("finish"); | ||
| 109 | &add($count,4,$count); | ||
| 110 | &bgt($count,&label("last_loop")); | ||
| 111 | |||
| 112 | &FR($o0,$t0,$a0,$b0); | ||
| 113 | &set_label("end"); | ||
| 114 | &function_end($name); | ||
| 115 | |||
| 116 | &fin_pool; | ||
| 117 | } | ||
| 118 | |||
| 119 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/div.pl b/src/lib/libcrypto/bn/asm/alpha.works/div.pl new file mode 100644 index 0000000000..7ec144377f --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/div.pl | |||
| @@ -0,0 +1,144 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | |||
| 3 | sub bn_div64 | ||
| 4 | { | ||
| 5 | local($data)=<<'EOF'; | ||
| 6 | # | ||
| 7 | # What follows was taken directly from the C compiler with a few | ||
| 8 | # hacks to redo the lables. | ||
| 9 | # | ||
| 10 | .text | ||
| 11 | .set noreorder | ||
| 12 | .set volatile | ||
| 13 | .align 3 | ||
| 14 | .globl bn_div64 | ||
| 15 | .ent bn_div64 | ||
| 16 | bn_div64: | ||
| 17 | ldgp $29,0($27) | ||
| 18 | bn_div64..ng: | ||
| 19 | lda $30,-48($30) | ||
| 20 | .frame $30,48,$26,0 | ||
| 21 | stq $26,0($30) | ||
| 22 | stq $9,8($30) | ||
| 23 | stq $10,16($30) | ||
| 24 | stq $11,24($30) | ||
| 25 | stq $12,32($30) | ||
| 26 | stq $13,40($30) | ||
| 27 | .mask 0x4003e00,-48 | ||
| 28 | .prologue 1 | ||
| 29 | bis $16,$16,$9 | ||
| 30 | bis $17,$17,$10 | ||
| 31 | bis $18,$18,$11 | ||
| 32 | bis $31,$31,$13 | ||
| 33 | bis $31,2,$12 | ||
| 34 | bne $11,$9119 | ||
| 35 | lda $0,-1 | ||
| 36 | br $31,$9136 | ||
| 37 | .align 4 | ||
| 38 | $9119: | ||
| 39 | bis $11,$11,$16 | ||
| 40 | jsr $26,BN_num_bits_word | ||
| 41 | ldgp $29,0($26) | ||
| 42 | subq $0,64,$1 | ||
| 43 | beq $1,$9120 | ||
| 44 | bis $31,1,$1 | ||
| 45 | sll $1,$0,$1 | ||
| 46 | cmpule $9,$1,$1 | ||
| 47 | bne $1,$9120 | ||
| 48 | # lda $16,_IO_stderr_ | ||
| 49 | # lda $17,$C32 | ||
| 50 | # bis $0,$0,$18 | ||
| 51 | # jsr $26,fprintf | ||
| 52 | # ldgp $29,0($26) | ||
| 53 | jsr $26,abort | ||
| 54 | ldgp $29,0($26) | ||
| 55 | .align 4 | ||
| 56 | $9120: | ||
| 57 | bis $31,64,$3 | ||
| 58 | cmpult $9,$11,$2 | ||
| 59 | subq $3,$0,$1 | ||
| 60 | addl $1,$31,$0 | ||
| 61 | subq $9,$11,$1 | ||
| 62 | cmoveq $2,$1,$9 | ||
| 63 | beq $0,$9122 | ||
| 64 | zapnot $0,15,$2 | ||
| 65 | subq $3,$0,$1 | ||
| 66 | sll $11,$2,$11 | ||
| 67 | sll $9,$2,$3 | ||
| 68 | srl $10,$1,$1 | ||
| 69 | sll $10,$2,$10 | ||
| 70 | bis $3,$1,$9 | ||
| 71 | $9122: | ||
| 72 | srl $11,32,$5 | ||
| 73 | zapnot $11,15,$6 | ||
| 74 | lda $7,-1 | ||
| 75 | .align 5 | ||
| 76 | $9123: | ||
| 77 | srl $9,32,$1 | ||
| 78 | subq $1,$5,$1 | ||
| 79 | bne $1,$9126 | ||
| 80 | zapnot $7,15,$27 | ||
| 81 | br $31,$9127 | ||
| 82 | .align 4 | ||
| 83 | $9126: | ||
| 84 | bis $9,$9,$24 | ||
| 85 | bis $5,$5,$25 | ||
| 86 | divqu $24,$25,$27 | ||
| 87 | $9127: | ||
| 88 | srl $10,32,$4 | ||
| 89 | .align 5 | ||
| 90 | $9128: | ||
| 91 | mulq $27,$5,$1 | ||
| 92 | subq $9,$1,$3 | ||
| 93 | zapnot $3,240,$1 | ||
| 94 | bne $1,$9129 | ||
| 95 | mulq $6,$27,$2 | ||
| 96 | sll $3,32,$1 | ||
| 97 | addq $1,$4,$1 | ||
| 98 | cmpule $2,$1,$2 | ||
| 99 | bne $2,$9129 | ||
| 100 | subq $27,1,$27 | ||
| 101 | br $31,$9128 | ||
| 102 | .align 4 | ||
| 103 | $9129: | ||
| 104 | mulq $27,$6,$1 | ||
| 105 | mulq $27,$5,$4 | ||
| 106 | srl $1,32,$3 | ||
| 107 | sll $1,32,$1 | ||
| 108 | addq $4,$3,$4 | ||
| 109 | cmpult $10,$1,$2 | ||
| 110 | subq $10,$1,$10 | ||
| 111 | addq $2,$4,$2 | ||
| 112 | cmpult $9,$2,$1 | ||
| 113 | bis $2,$2,$4 | ||
| 114 | beq $1,$9134 | ||
| 115 | addq $9,$11,$9 | ||
| 116 | subq $27,1,$27 | ||
| 117 | $9134: | ||
| 118 | subl $12,1,$12 | ||
| 119 | subq $9,$4,$9 | ||
| 120 | beq $12,$9124 | ||
| 121 | sll $27,32,$13 | ||
| 122 | sll $9,32,$2 | ||
| 123 | srl $10,32,$1 | ||
| 124 | sll $10,32,$10 | ||
| 125 | bis $2,$1,$9 | ||
| 126 | br $31,$9123 | ||
| 127 | .align 4 | ||
| 128 | $9124: | ||
| 129 | bis $13,$27,$0 | ||
| 130 | $9136: | ||
| 131 | ldq $26,0($30) | ||
| 132 | ldq $9,8($30) | ||
| 133 | ldq $10,16($30) | ||
| 134 | ldq $11,24($30) | ||
| 135 | ldq $12,32($30) | ||
| 136 | ldq $13,40($30) | ||
| 137 | addq $30,48,$30 | ||
| 138 | ret $31,($26),1 | ||
| 139 | .end bn_div64 | ||
| 140 | EOF | ||
| 141 | &asm_add($data); | ||
| 142 | } | ||
| 143 | |||
| 144 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul.pl new file mode 100644 index 0000000000..b182bae452 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul.pl | |||
| @@ -0,0 +1,116 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_mul_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r,$couny); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $count=&wparam(2); | ||
| 15 | $word=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | &br(&label("finish")); | ||
| 23 | &blt($count,&label("finish")); | ||
| 24 | |||
| 25 | ($a0,$r0)=&NR(2); | ||
| 26 | &ld($a0,&QWPw(0,$ap)); | ||
| 27 | &ld($r0,&QWPw(0,$rp)); | ||
| 28 | |||
| 29 | $a=<<'EOF'; | ||
| 30 | ########################################################## | ||
| 31 | &set_label("loop"); | ||
| 32 | |||
| 33 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
| 34 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
| 35 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
| 36 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
| 37 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
| 38 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
| 39 | |||
| 40 | ($o0,$t0)=&NR(2); | ||
| 41 | &add($a0,$b0,$o0); | ||
| 42 | &cmpult($o0,$b0,$t0); | ||
| 43 | &add($o0,$cc,$o0); | ||
| 44 | &cmpult($o0,$cc,$cc); | ||
| 45 | &add($cc,$t0,$cc); &FR($t0); | ||
| 46 | |||
| 47 | ($t1,$o1)=&NR(2); | ||
| 48 | |||
| 49 | &add($a1,$b1,$o1); &FR($a1); | ||
| 50 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
| 51 | &add($o1,$cc,$o1); | ||
| 52 | &cmpult($o1,$cc,$cc); | ||
| 53 | &add($cc,$t1,$cc); &FR($t1); | ||
| 54 | |||
| 55 | ($t2,$o2)=&NR(2); | ||
| 56 | |||
| 57 | &add($a2,$b2,$o2); &FR($a2); | ||
| 58 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
| 59 | &add($o2,$cc,$o2); | ||
| 60 | &cmpult($o2,$cc,$cc); | ||
| 61 | &add($cc,$t2,$cc); &FR($t2); | ||
| 62 | |||
| 63 | ($t3,$o3)=&NR(2); | ||
| 64 | |||
| 65 | &add($a3,$b3,$o3); &FR($a3); | ||
| 66 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
| 67 | &add($o3,$cc,$o3); | ||
| 68 | &cmpult($o3,$cc,$cc); | ||
| 69 | &add($cc,$t3,$cc); &FR($t3); | ||
| 70 | |||
| 71 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
| 72 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
| 73 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
| 74 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
| 75 | |||
| 76 | &sub($count,4,$count); # count-=4 | ||
| 77 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 78 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 79 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 80 | |||
| 81 | &blt($count,&label("finish")); | ||
| 82 | &ld($a0,&QWPw(0,$ap)); | ||
| 83 | &ld($b0,&QWPw(0,$bp)); | ||
| 84 | &br(&label("loop")); | ||
| 85 | EOF | ||
| 86 | ################################################## | ||
| 87 | # Do the last 0..3 words | ||
| 88 | |||
| 89 | &set_label("last_loop"); | ||
| 90 | |||
| 91 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
| 92 | &mul($a0,$word,($l0)=&NR(1)); | ||
| 93 | &add($ap,$QWS,$ap); | ||
| 94 | &muh($a0,$word,($h0)=&NR(1)); &FR($a0); | ||
| 95 | &add($l0,$cc,$l0); | ||
| 96 | &add($rp,$QWS,$rp); | ||
| 97 | &sub($count,1,$count); | ||
| 98 | &cmpult($l0,$cc,$cc); | ||
| 99 | &st($l0,&QWPw(-1,$rp)); &FR($l0); | ||
| 100 | &add($h0,$cc,$cc); &FR($h0); | ||
| 101 | |||
| 102 | &bgt($count,&label("last_loop")); | ||
| 103 | &function_end_A($name); | ||
| 104 | |||
| 105 | ###################################################### | ||
| 106 | &set_label("finish"); | ||
| 107 | &add($count,4,$count); | ||
| 108 | &bgt($count,&label("last_loop")); | ||
| 109 | |||
| 110 | &set_label("end"); | ||
| 111 | &function_end($name); | ||
| 112 | |||
| 113 | &fin_pool; | ||
| 114 | } | ||
| 115 | |||
| 116 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl new file mode 100644 index 0000000000..e37f6315fb --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl | |||
| @@ -0,0 +1,120 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_mul_add_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r,$couny); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $count=&wparam(2); | ||
| 15 | $word=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | &br(&label("finish")); | ||
| 23 | &blt($count,&label("finish")); | ||
| 24 | |||
| 25 | ($a0,$r0)=&NR(2); | ||
| 26 | &ld($a0,&QWPw(0,$ap)); | ||
| 27 | &ld($r0,&QWPw(0,$rp)); | ||
| 28 | |||
| 29 | $a=<<'EOF'; | ||
| 30 | ########################################################## | ||
| 31 | &set_label("loop"); | ||
| 32 | |||
| 33 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
| 34 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
| 35 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
| 36 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
| 37 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
| 38 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
| 39 | |||
| 40 | ($o0,$t0)=&NR(2); | ||
| 41 | &add($a0,$b0,$o0); | ||
| 42 | &cmpult($o0,$b0,$t0); | ||
| 43 | &add($o0,$cc,$o0); | ||
| 44 | &cmpult($o0,$cc,$cc); | ||
| 45 | &add($cc,$t0,$cc); &FR($t0); | ||
| 46 | |||
| 47 | ($t1,$o1)=&NR(2); | ||
| 48 | |||
| 49 | &add($a1,$b1,$o1); &FR($a1); | ||
| 50 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
| 51 | &add($o1,$cc,$o1); | ||
| 52 | &cmpult($o1,$cc,$cc); | ||
| 53 | &add($cc,$t1,$cc); &FR($t1); | ||
| 54 | |||
| 55 | ($t2,$o2)=&NR(2); | ||
| 56 | |||
| 57 | &add($a2,$b2,$o2); &FR($a2); | ||
| 58 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
| 59 | &add($o2,$cc,$o2); | ||
| 60 | &cmpult($o2,$cc,$cc); | ||
| 61 | &add($cc,$t2,$cc); &FR($t2); | ||
| 62 | |||
| 63 | ($t3,$o3)=&NR(2); | ||
| 64 | |||
| 65 | &add($a3,$b3,$o3); &FR($a3); | ||
| 66 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
| 67 | &add($o3,$cc,$o3); | ||
| 68 | &cmpult($o3,$cc,$cc); | ||
| 69 | &add($cc,$t3,$cc); &FR($t3); | ||
| 70 | |||
| 71 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
| 72 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
| 73 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
| 74 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
| 75 | |||
| 76 | &sub($count,4,$count); # count-=4 | ||
| 77 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 78 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 79 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 80 | |||
| 81 | &blt($count,&label("finish")); | ||
| 82 | &ld($a0,&QWPw(0,$ap)); | ||
| 83 | &ld($b0,&QWPw(0,$bp)); | ||
| 84 | &br(&label("loop")); | ||
| 85 | EOF | ||
| 86 | ################################################## | ||
| 87 | # Do the last 0..3 words | ||
| 88 | |||
| 89 | &set_label("last_loop"); | ||
| 90 | |||
| 91 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
| 92 | &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b | ||
| 93 | &mul($a0,$word,($l0)=&NR(1)); | ||
| 94 | &sub($count,1,$count); | ||
| 95 | &add($ap,$QWS,$ap); | ||
| 96 | &muh($a0,$word,($h0)=&NR(1)); &FR($a0); | ||
| 97 | &add($r0,$l0,$r0); | ||
| 98 | &add($rp,$QWS,$rp); | ||
| 99 | &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); | ||
| 100 | &add($r0,$cc,$r0); | ||
| 101 | &add($h0,$t0,$h0); &FR($t0); | ||
| 102 | &cmpult($r0,$cc,$cc); | ||
| 103 | &st($r0,&QWPw(-1,$rp)); &FR($r0); | ||
| 104 | &add($h0,$cc,$cc); &FR($h0); | ||
| 105 | |||
| 106 | &bgt($count,&label("last_loop")); | ||
| 107 | &function_end_A($name); | ||
| 108 | |||
| 109 | ###################################################### | ||
| 110 | &set_label("finish"); | ||
| 111 | &add($count,4,$count); | ||
| 112 | &bgt($count,&label("last_loop")); | ||
| 113 | |||
| 114 | &set_label("end"); | ||
| 115 | &function_end($name); | ||
| 116 | |||
| 117 | &fin_pool; | ||
| 118 | } | ||
| 119 | |||
| 120 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl new file mode 100644 index 0000000000..5efd201281 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl | |||
| @@ -0,0 +1,213 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub mul_add_c | ||
| 5 | { | ||
| 6 | local($a,$b,$c0,$c1,$c2)=@_; | ||
| 7 | local($l1,$h1,$t1,$t2); | ||
| 8 | |||
| 9 | &mul($a,$b,($l1)=&NR(1)); | ||
| 10 | &muh($a,$b,($h1)=&NR(1)); | ||
| 11 | &add($c0,$l1,$c0); | ||
| 12 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
| 13 | &add($t1,$h1,$h1); &FR($t1); | ||
| 14 | &add($c1,$h1,$c1); | ||
| 15 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
| 16 | &add($c2,$t2,$c2); &FR($t2); | ||
| 17 | } | ||
| 18 | |||
| 19 | sub bn_mul_comba4 | ||
| 20 | { | ||
| 21 | local($name)=@_; | ||
| 22 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 23 | |||
| 24 | $cnt=1; | ||
| 25 | &init_pool(3); | ||
| 26 | |||
| 27 | $rp=&wparam(0); | ||
| 28 | $ap=&wparam(1); | ||
| 29 | $bp=&wparam(2); | ||
| 30 | |||
| 31 | &function_begin($name,""); | ||
| 32 | |||
| 33 | &comment(""); | ||
| 34 | |||
| 35 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 36 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
| 37 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 38 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
| 39 | &mul($a[0],$b[0],($r00)=&NR(1)); | ||
| 40 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 41 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
| 42 | &muh($a[0],$b[0],($r01)=&NR(1)); | ||
| 43 | &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
| 44 | &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp)); | ||
| 45 | &mul($a[0],$b[1],($r02)=&NR(1)); | ||
| 46 | |||
| 47 | ($R,$H1,$H2)=&NR(3); | ||
| 48 | |||
| 49 | &st($r00,&QWPw(0,$rp)); &FR($r00); | ||
| 50 | |||
| 51 | &mov("zero",$R); | ||
| 52 | &mul($a[1],$b[0],($r03)=&NR(1)); | ||
| 53 | |||
| 54 | &mov("zero",$H1); | ||
| 55 | &mov("zero",$H0); | ||
| 56 | &add($R,$r01,$R); | ||
| 57 | &muh($a[0],$b[1],($r04)=&NR(1)); | ||
| 58 | &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01); | ||
| 59 | &add($R,$r02,$R); | ||
| 60 | &add($H1,$t01,$H1) &FR($t01); | ||
| 61 | &muh($a[1],$b[0],($r05)=&NR(1)); | ||
| 62 | &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02); | ||
| 63 | &add($R,$r03,$R); | ||
| 64 | &add($H2,$t02,$H2) &FR($t02); | ||
| 65 | &mul($a[0],$b[2],($r06)=&NR(1)); | ||
| 66 | &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03); | ||
| 67 | &add($H1,$t03,$H1) &FR($t03); | ||
| 68 | &st($R,&QWPw(1,$rp)); | ||
| 69 | &add($H1,$H2,$R); | ||
| 70 | |||
| 71 | &mov("zero",$H1); | ||
| 72 | &add($R,$r04,$R); | ||
| 73 | &mov("zero",$H2); | ||
| 74 | &mul($a[1],$b[1],($r07)=&NR(1)); | ||
| 75 | &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04); | ||
| 76 | &add($R,$r05,$R); | ||
| 77 | &add($H1,$t04,$H1) &FR($t04); | ||
| 78 | &mul($a[2],$b[0],($r08)=&NR(1)); | ||
| 79 | &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05); | ||
| 80 | &add($R,$r01,$R); | ||
| 81 | &add($H2,$t05,$H2) &FR($t05); | ||
| 82 | &muh($a[0],$b[2],($r09)=&NR(1)); | ||
| 83 | &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06); | ||
| 84 | &add($R,$r07,$R); | ||
| 85 | &add($H1,$t06,$H1) &FR($t06); | ||
| 86 | &muh($a[1],$b[1],($r10)=&NR(1)); | ||
| 87 | &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07); | ||
| 88 | &add($R,$r08,$R); | ||
| 89 | &add($H2,$t07,$H2) &FR($t07); | ||
| 90 | &muh($a[2],$b[0],($r11)=&NR(1)); | ||
| 91 | &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08); | ||
| 92 | &add($H1,$t08,$H1) &FR($t08); | ||
| 93 | &st($R,&QWPw(2,$rp)); | ||
| 94 | &add($H1,$H2,$R); | ||
| 95 | |||
| 96 | &mov("zero",$H1); | ||
| 97 | &add($R,$r09,$R); | ||
| 98 | &mov("zero",$H2); | ||
| 99 | &mul($a[0],$b[3],($r12)=&NR(1)); | ||
| 100 | &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09); | ||
| 101 | &add($R,$r10,$R); | ||
| 102 | &add($H1,$t09,$H1) &FR($t09); | ||
| 103 | &mul($a[1],$b[2],($r13)=&NR(1)); | ||
| 104 | &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10); | ||
| 105 | &add($R,$r11,$R); | ||
| 106 | &add($H1,$t10,$H1) &FR($t10); | ||
| 107 | &mul($a[2],$b[1],($r14)=&NR(1)); | ||
| 108 | &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11); | ||
| 109 | &add($R,$r12,$R); | ||
| 110 | &add($H1,$t11,$H1) &FR($t11); | ||
| 111 | &mul($a[3],$b[0],($r15)=&NR(1)); | ||
| 112 | &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12); | ||
| 113 | &add($R,$r13,$R); | ||
| 114 | &add($H1,$t12,$H1) &FR($t12); | ||
| 115 | &muh($a[0],$b[3],($r16)=&NR(1)); | ||
| 116 | &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13); | ||
| 117 | &add($R,$r14,$R); | ||
| 118 | &add($H1,$t13,$H1) &FR($t13); | ||
| 119 | &muh($a[1],$b[2],($r17)=&NR(1)); | ||
| 120 | &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14); | ||
| 121 | &add($R,$r15,$R); | ||
| 122 | &add($H1,$t14,$H1) &FR($t14); | ||
| 123 | &muh($a[2],$b[1],($r18)=&NR(1)); | ||
| 124 | &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15); | ||
| 125 | &add($H1,$t15,$H1) &FR($t15); | ||
| 126 | &st($R,&QWPw(3,$rp)); | ||
| 127 | &add($H1,$H2,$R); | ||
| 128 | |||
| 129 | &mov("zero",$H1); | ||
| 130 | &add($R,$r16,$R); | ||
| 131 | &mov("zero",$H2); | ||
| 132 | &muh($a[3],$b[0],($r19)=&NR(1)); | ||
| 133 | &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16); | ||
| 134 | &add($R,$r17,$R); | ||
| 135 | &add($H1,$t16,$H1) &FR($t16); | ||
| 136 | &mul($a[1],$b[3],($r20)=&NR(1)); | ||
| 137 | &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17); | ||
| 138 | &add($R,$r18,$R); | ||
| 139 | &add($H1,$t17,$H1) &FR($t17); | ||
| 140 | &mul($a[2],$b[2],($r21)=&NR(1)); | ||
| 141 | &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18); | ||
| 142 | &add($R,$r19,$R); | ||
| 143 | &add($H1,$t18,$H1) &FR($t18); | ||
| 144 | &mul($a[3],$b[1],($r22)=&NR(1)); | ||
| 145 | &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19); | ||
| 146 | &add($R,$r20,$R); | ||
| 147 | &add($H1,$t19,$H1) &FR($t19); | ||
| 148 | &muh($a[1],$b[3],($r23)=&NR(1)); | ||
| 149 | &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20); | ||
| 150 | &add($R,$r21,$R); | ||
| 151 | &add($H1,$t20,$H1) &FR($t20); | ||
| 152 | &muh($a[2],$b[2],($r24)=&NR(1)); | ||
| 153 | &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21); | ||
| 154 | &add($R,$r22,$R); | ||
| 155 | &add($H1,$t21,$H1) &FR($t21); | ||
| 156 | &muh($a[3],$b[1],($r25)=&NR(1)); | ||
| 157 | &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22); | ||
| 158 | &add($H1,$t22,$H1) &FR($t22); | ||
| 159 | &st($R,&QWPw(4,$rp)); | ||
| 160 | &add($H1,$H2,$R); | ||
| 161 | |||
| 162 | &mov("zero",$H1); | ||
| 163 | &add($R,$r23,$R); | ||
| 164 | &mov("zero",$H2); | ||
| 165 | &mul($a[2],$b[3],($r26)=&NR(1)); | ||
| 166 | &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23); | ||
| 167 | &add($R,$r24,$R); | ||
| 168 | &add($H1,$t23,$H1) &FR($t23); | ||
| 169 | &mul($a[3],$b[2],($r27)=&NR(1)); | ||
| 170 | &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24); | ||
| 171 | &add($R,$r25,$R); | ||
| 172 | &add($H1,$t24,$H1) &FR($t24); | ||
| 173 | &muh($a[2],$b[3],($r28)=&NR(1)); | ||
| 174 | &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25); | ||
| 175 | &add($R,$r26,$R); | ||
| 176 | &add($H1,$t25,$H1) &FR($t25); | ||
| 177 | &muh($a[3],$b[2],($r29)=&NR(1)); | ||
| 178 | &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26); | ||
| 179 | &add($R,$r27,$R); | ||
| 180 | &add($H1,$t26,$H1) &FR($t26); | ||
| 181 | &mul($a[3],$b[3],($r30)=&NR(1)); | ||
| 182 | &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27); | ||
| 183 | &add($H1,$t27,$H1) &FR($t27); | ||
| 184 | &st($R,&QWPw(5,$rp)); | ||
| 185 | &add($H1,$H2,$R); | ||
| 186 | |||
| 187 | &mov("zero",$H1); | ||
| 188 | &add($R,$r28,$R); | ||
| 189 | &mov("zero",$H2); | ||
| 190 | &muh($a[3],$b[3],($r31)=&NR(1)); | ||
| 191 | &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28); | ||
| 192 | &add($R,$r29,$R); | ||
| 193 | &add($H1,$t28,$H1) &FR($t28); | ||
| 194 | ############ | ||
| 195 | &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29); | ||
| 196 | &add($R,$r30,$R); | ||
| 197 | &add($H1,$t29,$H1) &FR($t29); | ||
| 198 | ############ | ||
| 199 | &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30); | ||
| 200 | &add($H1,$t30,$H1) &FR($t30); | ||
| 201 | &st($R,&QWPw(6,$rp)); | ||
| 202 | &add($H1,$H2,$R); | ||
| 203 | |||
| 204 | &add($R,$r31,$R); &FR($r31); | ||
| 205 | &st($R,&QWPw(7,$rp)); | ||
| 206 | |||
| 207 | &FR($R,$H1,$H2); | ||
| 208 | &function_end($name); | ||
| 209 | |||
| 210 | &fin_pool; | ||
| 211 | } | ||
| 212 | |||
| 213 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl new file mode 100644 index 0000000000..79d86dd25c --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl | |||
| @@ -0,0 +1,98 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub mul_add_c | ||
| 5 | { | ||
| 6 | local($a,$b,$c0,$c1,$c2)=@_; | ||
| 7 | local($l1,$h1,$t1,$t2); | ||
| 8 | |||
| 9 | print STDERR "count=$cnt\n"; $cnt++; | ||
| 10 | &mul($a,$b,($l1)=&NR(1)); | ||
| 11 | &muh($a,$b,($h1)=&NR(1)); | ||
| 12 | &add($c0,$l1,$c0); | ||
| 13 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
| 14 | &add($t1,$h1,$h1); &FR($t1); | ||
| 15 | &add($c1,$h1,$c1); | ||
| 16 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
| 17 | &add($c2,$t2,$c2); &FR($t2); | ||
| 18 | } | ||
| 19 | |||
| 20 | sub bn_mul_comba4 | ||
| 21 | { | ||
| 22 | local($name)=@_; | ||
| 23 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 24 | |||
| 25 | $cnt=1; | ||
| 26 | &init_pool(3); | ||
| 27 | |||
| 28 | $rp=&wparam(0); | ||
| 29 | $ap=&wparam(1); | ||
| 30 | $bp=&wparam(2); | ||
| 31 | |||
| 32 | &function_begin($name,""); | ||
| 33 | |||
| 34 | &comment(""); | ||
| 35 | |||
| 36 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 37 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
| 38 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 39 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
| 40 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 41 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
| 42 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); | ||
| 43 | &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp); | ||
| 44 | |||
| 45 | ($c0,$c1,$c2)=&NR(3); | ||
| 46 | &mov("zero",$c2); | ||
| 47 | &mul($a[0],$b[0],$c0); | ||
| 48 | &muh($a[0],$b[0],$c1); | ||
| 49 | &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 50 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 51 | &mov("zero",$c2); | ||
| 52 | |||
| 53 | &mul_add_c($a[0],$b[1],$c0,$c1,$c2); | ||
| 54 | &mul_add_c($a[1],$b[0],$c0,$c1,$c2); | ||
| 55 | &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 56 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 57 | &mov("zero",$c2); | ||
| 58 | |||
| 59 | &mul_add_c($a[1],$b[1],$c0,$c1,$c2); | ||
| 60 | &mul_add_c($a[0],$b[2],$c0,$c1,$c2); | ||
| 61 | &mul_add_c($a[2],$b[0],$c0,$c1,$c2); | ||
| 62 | &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 63 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 64 | &mov("zero",$c2); | ||
| 65 | |||
| 66 | &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]); | ||
| 67 | &mul_add_c($a[1],$b[2],$c0,$c1,$c2); | ||
| 68 | &mul_add_c($a[2],$b[1],$c0,$c1,$c2); | ||
| 69 | &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]); | ||
| 70 | &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 71 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 72 | &mov("zero",$c2); | ||
| 73 | |||
| 74 | &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]); | ||
| 75 | &mul_add_c($a[2],$b[2],$c0,$c1,$c2); | ||
| 76 | &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]); | ||
| 77 | &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 78 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 79 | &mov("zero",$c2); | ||
| 80 | |||
| 81 | &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]); | ||
| 82 | &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]); | ||
| 83 | &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 84 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 85 | &mov("zero",$c2); | ||
| 86 | |||
| 87 | &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]); | ||
| 88 | &st($c0,&QWPw(6,$rp)); | ||
| 89 | &st($c1,&QWPw(7,$rp)); | ||
| 90 | |||
| 91 | &FR($c0,$c1,$c2); | ||
| 92 | |||
| 93 | &function_end($name); | ||
| 94 | |||
| 95 | &fin_pool; | ||
| 96 | } | ||
| 97 | |||
| 98 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl new file mode 100644 index 0000000000..525ca7494b --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl | |||
| @@ -0,0 +1,177 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_mul_comba8 | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 8 | |||
| 9 | $cnt=1; | ||
| 10 | &init_pool(3); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $bp=&wparam(2); | ||
| 15 | |||
| 16 | &function_begin($name,""); | ||
| 17 | |||
| 18 | &comment(""); | ||
| 19 | |||
| 20 | &stack_push(2); | ||
| 21 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 22 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
| 23 | &st($reg_s0,&swtmp(0)); &FR($reg_s0); | ||
| 24 | &st($reg_s1,&swtmp(1)); &FR($reg_s1); | ||
| 25 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 26 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
| 27 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 28 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
| 29 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
| 30 | &ld(($b[3])=&NR(1),&QWPw(3,$bp)); | ||
| 31 | &ld(($a[4])=&NR(1),&QWPw(1,$ap)); | ||
| 32 | &ld(($b[4])=&NR(1),&QWPw(1,$bp)); | ||
| 33 | &ld(($a[5])=&NR(1),&QWPw(1,$ap)); | ||
| 34 | &ld(($b[5])=&NR(1),&QWPw(1,$bp)); | ||
| 35 | &ld(($a[6])=&NR(1),&QWPw(1,$ap)); | ||
| 36 | &ld(($b[6])=&NR(1),&QWPw(1,$bp)); | ||
| 37 | &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap); | ||
| 38 | &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp); | ||
| 39 | |||
| 40 | ($c0,$c1,$c2)=&NR(3); | ||
| 41 | &mov("zero",$c2); | ||
| 42 | &mul($a[0],$b[0],$c0); | ||
| 43 | &muh($a[0],$b[0],$c1); | ||
| 44 | &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 45 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 46 | &mov("zero",$c2); | ||
| 47 | |||
| 48 | &mul_add_c($a[0],$b[1],$c0,$c1,$c2); | ||
| 49 | &mul_add_c($a[1],$b[0],$c0,$c1,$c2); | ||
| 50 | &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 51 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 52 | &mov("zero",$c2); | ||
| 53 | |||
| 54 | &mul_add_c($a[0],$b[2],$c0,$c1,$c2); | ||
| 55 | &mul_add_c($a[1],$b[1],$c0,$c1,$c2); | ||
| 56 | &mul_add_c($a[2],$b[0],$c0,$c1,$c2); | ||
| 57 | &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 58 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 59 | &mov("zero",$c2); | ||
| 60 | |||
| 61 | &mul_add_c($a[0],$b[3],$c0,$c1,$c2); | ||
| 62 | &mul_add_c($a[1],$b[2],$c0,$c1,$c2); | ||
| 63 | &mul_add_c($a[2],$b[1],$c0,$c1,$c2); | ||
| 64 | &mul_add_c($a[3],$b[0],$c0,$c1,$c2); | ||
| 65 | &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 66 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 67 | &mov("zero",$c2); | ||
| 68 | |||
| 69 | &mul_add_c($a[0],$b[4],$c0,$c1,$c2); | ||
| 70 | &mul_add_c($a[1],$b[3],$c0,$c1,$c2); | ||
| 71 | &mul_add_c($a[2],$b[2],$c0,$c1,$c2); | ||
| 72 | &mul_add_c($a[3],$b[1],$c0,$c1,$c2); | ||
| 73 | &mul_add_c($a[4],$b[0],$c0,$c1,$c2); | ||
| 74 | &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 75 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 76 | &mov("zero",$c2); | ||
| 77 | |||
| 78 | &mul_add_c($a[0],$b[5],$c0,$c1,$c2); | ||
| 79 | &mul_add_c($a[1],$b[4],$c0,$c1,$c2); | ||
| 80 | &mul_add_c($a[2],$b[3],$c0,$c1,$c2); | ||
| 81 | &mul_add_c($a[3],$b[2],$c0,$c1,$c2); | ||
| 82 | &mul_add_c($a[4],$b[1],$c0,$c1,$c2); | ||
| 83 | &mul_add_c($a[5],$b[0],$c0,$c1,$c2); | ||
| 84 | &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 85 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 86 | &mov("zero",$c2); | ||
| 87 | |||
| 88 | &mul_add_c($a[0],$b[6],$c0,$c1,$c2); | ||
| 89 | &mul_add_c($a[1],$b[5],$c0,$c1,$c2); | ||
| 90 | &mul_add_c($a[2],$b[4],$c0,$c1,$c2); | ||
| 91 | &mul_add_c($a[3],$b[3],$c0,$c1,$c2); | ||
| 92 | &mul_add_c($a[4],$b[2],$c0,$c1,$c2); | ||
| 93 | &mul_add_c($a[5],$b[1],$c0,$c1,$c2); | ||
| 94 | &mul_add_c($a[6],$b[0],$c0,$c1,$c2); | ||
| 95 | &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 96 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 97 | &mov("zero",$c2); | ||
| 98 | |||
| 99 | &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]); | ||
| 100 | &mul_add_c($a[1],$b[6],$c0,$c1,$c2); | ||
| 101 | &mul_add_c($a[2],$b[5],$c0,$c1,$c2); | ||
| 102 | &mul_add_c($a[3],$b[4],$c0,$c1,$c2); | ||
| 103 | &mul_add_c($a[4],$b[3],$c0,$c1,$c2); | ||
| 104 | &mul_add_c($a[5],$b[2],$c0,$c1,$c2); | ||
| 105 | &mul_add_c($a[6],$b[1],$c0,$c1,$c2); | ||
| 106 | &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]); | ||
| 107 | &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 108 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 109 | &mov("zero",$c2); | ||
| 110 | |||
| 111 | &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]); | ||
| 112 | &mul_add_c($a[2],$b[6],$c0,$c1,$c2); | ||
| 113 | &mul_add_c($a[3],$b[5],$c0,$c1,$c2); | ||
| 114 | &mul_add_c($a[4],$b[4],$c0,$c1,$c2); | ||
| 115 | &mul_add_c($a[5],$b[3],$c0,$c1,$c2); | ||
| 116 | &mul_add_c($a[6],$b[2],$c0,$c1,$c2); | ||
| 117 | &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]); | ||
| 118 | &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 119 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 120 | &mov("zero",$c2); | ||
| 121 | |||
| 122 | &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]); | ||
| 123 | &mul_add_c($a[3],$b[6],$c0,$c1,$c2); | ||
| 124 | &mul_add_c($a[4],$b[5],$c0,$c1,$c2); | ||
| 125 | &mul_add_c($a[5],$b[4],$c0,$c1,$c2); | ||
| 126 | &mul_add_c($a[6],$b[3],$c0,$c1,$c2); | ||
| 127 | &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]); | ||
| 128 | &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 129 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 130 | &mov("zero",$c2); | ||
| 131 | |||
| 132 | &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]); | ||
| 133 | &mul_add_c($a[4],$b[6],$c0,$c1,$c2); | ||
| 134 | &mul_add_c($a[5],$b[5],$c0,$c1,$c2); | ||
| 135 | &mul_add_c($a[6],$b[4],$c0,$c1,$c2); | ||
| 136 | &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]); | ||
| 137 | &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 138 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 139 | &mov("zero",$c2); | ||
| 140 | |||
| 141 | &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]); | ||
| 142 | &mul_add_c($a[5],$b[6],$c0,$c1,$c2); | ||
| 143 | &mul_add_c($a[6],$b[5],$c0,$c1,$c2); | ||
| 144 | &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]); | ||
| 145 | &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 146 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 147 | &mov("zero",$c2); | ||
| 148 | |||
| 149 | &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]); | ||
| 150 | &mul_add_c($a[6],$b[6],$c0,$c1,$c2); | ||
| 151 | &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]); | ||
| 152 | &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 153 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 154 | &mov("zero",$c2); | ||
| 155 | |||
| 156 | &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]); | ||
| 157 | &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]); | ||
| 158 | &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 159 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 160 | &mov("zero",$c2); | ||
| 161 | |||
| 162 | &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]); | ||
| 163 | &st($c0,&QWPw(14,$rp)); | ||
| 164 | &st($c1,&QWPw(15,$rp)); | ||
| 165 | |||
| 166 | &FR($c0,$c1,$c2); | ||
| 167 | |||
| 168 | &ld($reg_s0,&swtmp(0)); | ||
| 169 | &ld($reg_s1,&swtmp(1)); | ||
| 170 | &stack_pop(2); | ||
| 171 | |||
| 172 | &function_end($name); | ||
| 173 | |||
| 174 | &fin_pool; | ||
| 175 | } | ||
| 176 | |||
| 177 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl new file mode 100644 index 0000000000..a55b696906 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl | |||
| @@ -0,0 +1,113 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_sqr_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r,$couny); | ||
| 8 | |||
| 9 | &init_pool(3); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $count=&wparam(2); | ||
| 15 | |||
| 16 | &function_begin($name,""); | ||
| 17 | |||
| 18 | &comment(""); | ||
| 19 | &sub($count,4,$count); | ||
| 20 | &mov("zero",$cc); | ||
| 21 | &br(&label("finish")); | ||
| 22 | &blt($count,&label("finish")); | ||
| 23 | |||
| 24 | ($a0,$r0)=&NR(2); | ||
| 25 | &ld($a0,&QWPw(0,$ap)); | ||
| 26 | &ld($r0,&QWPw(0,$rp)); | ||
| 27 | |||
| 28 | $a=<<'EOF'; | ||
| 29 | ########################################################## | ||
| 30 | &set_label("loop"); | ||
| 31 | |||
| 32 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
| 33 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
| 34 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
| 35 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
| 36 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
| 37 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
| 38 | |||
| 39 | ($o0,$t0)=&NR(2); | ||
| 40 | &add($a0,$b0,$o0); | ||
| 41 | &cmpult($o0,$b0,$t0); | ||
| 42 | &add($o0,$cc,$o0); | ||
| 43 | &cmpult($o0,$cc,$cc); | ||
| 44 | &add($cc,$t0,$cc); &FR($t0); | ||
| 45 | |||
| 46 | ($t1,$o1)=&NR(2); | ||
| 47 | |||
| 48 | &add($a1,$b1,$o1); &FR($a1); | ||
| 49 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
| 50 | &add($o1,$cc,$o1); | ||
| 51 | &cmpult($o1,$cc,$cc); | ||
| 52 | &add($cc,$t1,$cc); &FR($t1); | ||
| 53 | |||
| 54 | ($t2,$o2)=&NR(2); | ||
| 55 | |||
| 56 | &add($a2,$b2,$o2); &FR($a2); | ||
| 57 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
| 58 | &add($o2,$cc,$o2); | ||
| 59 | &cmpult($o2,$cc,$cc); | ||
| 60 | &add($cc,$t2,$cc); &FR($t2); | ||
| 61 | |||
| 62 | ($t3,$o3)=&NR(2); | ||
| 63 | |||
| 64 | &add($a3,$b3,$o3); &FR($a3); | ||
| 65 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
| 66 | &add($o3,$cc,$o3); | ||
| 67 | &cmpult($o3,$cc,$cc); | ||
| 68 | &add($cc,$t3,$cc); &FR($t3); | ||
| 69 | |||
| 70 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
| 71 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
| 72 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
| 73 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
| 74 | |||
| 75 | &sub($count,4,$count); # count-=4 | ||
| 76 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 77 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 78 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 79 | |||
| 80 | &blt($count,&label("finish")); | ||
| 81 | &ld($a0,&QWPw(0,$ap)); | ||
| 82 | &ld($b0,&QWPw(0,$bp)); | ||
| 83 | &br(&label("loop")); | ||
| 84 | EOF | ||
| 85 | ################################################## | ||
| 86 | # Do the last 0..3 words | ||
| 87 | |||
| 88 | &set_label("last_loop"); | ||
| 89 | |||
| 90 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
| 91 | &mul($a0,$a0,($l0)=&NR(1)); | ||
| 92 | &add($ap,$QWS,$ap); | ||
| 93 | &add($rp,2*$QWS,$rp); | ||
| 94 | &sub($count,1,$count); | ||
| 95 | &muh($a0,$a0,($h0)=&NR(1)); &FR($a0); | ||
| 96 | &st($l0,&QWPw(-2,$rp)); &FR($l0); | ||
| 97 | &st($h0,&QWPw(-1,$rp)); &FR($h0); | ||
| 98 | |||
| 99 | &bgt($count,&label("last_loop")); | ||
| 100 | &function_end_A($name); | ||
| 101 | |||
| 102 | ###################################################### | ||
| 103 | &set_label("finish"); | ||
| 104 | &add($count,4,$count); | ||
| 105 | &bgt($count,&label("last_loop")); | ||
| 106 | |||
| 107 | &set_label("end"); | ||
| 108 | &function_end($name); | ||
| 109 | |||
| 110 | &fin_pool; | ||
| 111 | } | ||
| 112 | |||
| 113 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl new file mode 100644 index 0000000000..bf33f5b503 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl | |||
| @@ -0,0 +1,109 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub sqr_add_c | ||
| 5 | { | ||
| 6 | local($a,$c0,$c1,$c2)=@_; | ||
| 7 | local($l1,$h1,$t1,$t2); | ||
| 8 | |||
| 9 | &mul($a,$a,($l1)=&NR(1)); | ||
| 10 | &muh($a,$a,($h1)=&NR(1)); | ||
| 11 | &add($c0,$l1,$c0); | ||
| 12 | &add($c1,$h1,$c1); | ||
| 13 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
| 14 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
| 15 | &add($c1,$t1,$c1); &FR($t1); | ||
| 16 | &add($c2,$t2,$c2); &FR($t2); | ||
| 17 | } | ||
| 18 | |||
| 19 | sub sqr_add_c2 | ||
| 20 | { | ||
| 21 | local($a,$b,$c0,$c1,$c2)=@_; | ||
| 22 | local($l1,$h1,$t1,$t2); | ||
| 23 | |||
| 24 | &mul($a,$b,($l1)=&NR(1)); | ||
| 25 | &muh($a,$b,($h1)=&NR(1)); | ||
| 26 | &cmplt($l1,"zero",($lc1)=&NR(1)); | ||
| 27 | &cmplt($h1,"zero",($hc1)=&NR(1)); | ||
| 28 | &add($l1,$l1,$l1); | ||
| 29 | &add($h1,$h1,$h1); | ||
| 30 | &add($h1,$lc1,$h1); &FR($lc1); | ||
| 31 | &add($c2,$hc1,$c2); &FR($hc1); | ||
| 32 | |||
| 33 | &add($c0,$l1,$c0); | ||
| 34 | &add($c1,$h1,$c1); | ||
| 35 | &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1); | ||
| 36 | &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1); | ||
| 37 | |||
| 38 | &add($c1,$lc1,$c1); &FR($lc1); | ||
| 39 | &add($c2,$hc1,$c2); &FR($hc1); | ||
| 40 | } | ||
| 41 | |||
| 42 | |||
| 43 | sub bn_sqr_comba4 | ||
| 44 | { | ||
| 45 | local($name)=@_; | ||
| 46 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 47 | |||
| 48 | $cnt=1; | ||
| 49 | &init_pool(2); | ||
| 50 | |||
| 51 | $rp=&wparam(0); | ||
| 52 | $ap=&wparam(1); | ||
| 53 | |||
| 54 | &function_begin($name,""); | ||
| 55 | |||
| 56 | &comment(""); | ||
| 57 | |||
| 58 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 59 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 60 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 61 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); | ||
| 62 | |||
| 63 | ($c0,$c1,$c2)=&NR(3); | ||
| 64 | |||
| 65 | &mov("zero",$c2); | ||
| 66 | &mul($a[0],$a[0],$c0); | ||
| 67 | &muh($a[0],$a[0],$c1); | ||
| 68 | &st($c0,&QWPw(0,$rp)); | ||
| 69 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 70 | &mov("zero",$c2); | ||
| 71 | |||
| 72 | &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2); | ||
| 73 | &st($c0,&QWPw(1,$rp)); | ||
| 74 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 75 | &mov("zero",$c2); | ||
| 76 | |||
| 77 | &sqr_add_c($a[1],$c0,$c1,$c2); | ||
| 78 | &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); | ||
| 79 | &st($c0,&QWPw(2,$rp)); | ||
| 80 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 81 | &mov("zero",$c2); | ||
| 82 | |||
| 83 | &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); | ||
| 84 | &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); | ||
| 85 | &st($c0,&QWPw(3,$rp)); | ||
| 86 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 87 | &mov("zero",$c2); | ||
| 88 | |||
| 89 | &sqr_add_c($a[2],$c0,$c1,$c2); | ||
| 90 | &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); | ||
| 91 | &st($c0,&QWPw(4,$rp)); | ||
| 92 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 93 | &mov("zero",$c2); | ||
| 94 | |||
| 95 | &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); | ||
| 96 | &st($c0,&QWPw(5,$rp)); | ||
| 97 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 98 | &mov("zero",$c2); | ||
| 99 | |||
| 100 | &sqr_add_c($a[3],$c0,$c1,$c2); | ||
| 101 | &st($c0,&QWPw(6,$rp)); | ||
| 102 | &st($c1,&QWPw(7,$rp)); | ||
| 103 | |||
| 104 | &function_end($name); | ||
| 105 | |||
| 106 | &fin_pool; | ||
| 107 | } | ||
| 108 | |||
| 109 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl new file mode 100644 index 0000000000..b4afe085f1 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl | |||
| @@ -0,0 +1,132 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_sqr_comba8 | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 8 | |||
| 9 | $cnt=1; | ||
| 10 | &init_pool(2); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | |||
| 15 | &function_begin($name,""); | ||
| 16 | |||
| 17 | &comment(""); | ||
| 18 | |||
| 19 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 20 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 21 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 22 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
| 23 | &ld(($a[4])=&NR(1),&QWPw(4,$ap)); | ||
| 24 | &ld(($a[5])=&NR(1),&QWPw(5,$ap)); | ||
| 25 | &ld(($a[6])=&NR(1),&QWPw(6,$ap)); | ||
| 26 | &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap); | ||
| 27 | |||
| 28 | ($c0,$c1,$c2)=&NR(3); | ||
| 29 | |||
| 30 | &mov("zero",$c2); | ||
| 31 | &mul($a[0],$a[0],$c0); | ||
| 32 | &muh($a[0],$a[0],$c1); | ||
| 33 | &st($c0,&QWPw(0,$rp)); | ||
| 34 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 35 | &mov("zero",$c2); | ||
| 36 | |||
| 37 | &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2); | ||
| 38 | &st($c0,&QWPw(1,$rp)); | ||
| 39 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 40 | &mov("zero",$c2); | ||
| 41 | |||
| 42 | &sqr_add_c($a[1],$c0,$c1,$c2); | ||
| 43 | &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); | ||
| 44 | &st($c0,&QWPw(2,$rp)); | ||
| 45 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 46 | &mov("zero",$c2); | ||
| 47 | |||
| 48 | &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); | ||
| 49 | &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); | ||
| 50 | &st($c0,&QWPw(3,$rp)); | ||
| 51 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 52 | &mov("zero",$c2); | ||
| 53 | |||
| 54 | &sqr_add_c($a[2],$c0,$c1,$c2); | ||
| 55 | &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); | ||
| 56 | &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2); | ||
| 57 | &st($c0,&QWPw(4,$rp)); | ||
| 58 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 59 | &mov("zero",$c2); | ||
| 60 | |||
| 61 | &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); | ||
| 62 | &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2); | ||
| 63 | &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2); | ||
| 64 | &st($c0,&QWPw(5,$rp)); | ||
| 65 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 66 | &mov("zero",$c2); | ||
| 67 | |||
| 68 | &sqr_add_c($a[3],$c0,$c1,$c2); | ||
| 69 | &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2); | ||
| 70 | &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2); | ||
| 71 | &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2); | ||
| 72 | &st($c0,&QWPw(6,$rp)); | ||
| 73 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 74 | &mov("zero",$c2); | ||
| 75 | |||
| 76 | &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2); | ||
| 77 | &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2); | ||
| 78 | &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2); | ||
| 79 | &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2); | ||
| 80 | &st($c0,&QWPw(7,$rp)); | ||
| 81 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 82 | &mov("zero",$c2); | ||
| 83 | |||
| 84 | &sqr_add_c($a[4],$c0,$c1,$c2); | ||
| 85 | &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2); | ||
| 86 | &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2); | ||
| 87 | &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2); | ||
| 88 | &st($c0,&QWPw(8,$rp)); | ||
| 89 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 90 | &mov("zero",$c2); | ||
| 91 | |||
| 92 | &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2); | ||
| 93 | &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2); | ||
| 94 | &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2); | ||
| 95 | &st($c0,&QWPw(9,$rp)); | ||
| 96 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 97 | &mov("zero",$c2); | ||
| 98 | |||
| 99 | &sqr_add_c($a[5],$c0,$c1,$c2); | ||
| 100 | &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2); | ||
| 101 | &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2); | ||
| 102 | &st($c0,&QWPw(10,$rp)); | ||
| 103 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 104 | &mov("zero",$c2); | ||
| 105 | |||
| 106 | &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2); | ||
| 107 | &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2); | ||
| 108 | &st($c0,&QWPw(11,$rp)); | ||
| 109 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 110 | &mov("zero",$c2); | ||
| 111 | |||
| 112 | &sqr_add_c($a[6],$c0,$c1,$c2); | ||
| 113 | &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2); | ||
| 114 | &st($c0,&QWPw(12,$rp)); | ||
| 115 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 116 | &mov("zero",$c2); | ||
| 117 | |||
| 118 | &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2); | ||
| 119 | &st($c0,&QWPw(13,$rp)); | ||
| 120 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 121 | &mov("zero",$c2); | ||
| 122 | |||
| 123 | &sqr_add_c($a[7],$c0,$c1,$c2); | ||
| 124 | &st($c0,&QWPw(14,$rp)); | ||
| 125 | &st($c1,&QWPw(15,$rp)); | ||
| 126 | |||
| 127 | &function_end($name); | ||
| 128 | |||
| 129 | &fin_pool; | ||
| 130 | } | ||
| 131 | |||
| 132 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sub.pl b/src/lib/libcrypto/bn/asm/alpha.works/sub.pl new file mode 100644 index 0000000000..d998da5c21 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sub.pl | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_sub_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $bp=&wparam(2); | ||
| 15 | $count=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | &blt($count,&label("finish")); | ||
| 23 | |||
| 24 | ($a0,$b0)=&NR(2); | ||
| 25 | &ld($a0,&QWPw(0,$ap)); | ||
| 26 | &ld($b0,&QWPw(0,$bp)); | ||
| 27 | |||
| 28 | ########################################################## | ||
| 29 | &set_label("loop"); | ||
| 30 | |||
| 31 | ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8); | ||
| 32 | &ld($a1,&QWPw(1,$ap)); | ||
| 33 | &cmpult($a0,$b0,$tmp); # will we borrow? | ||
| 34 | &ld($b1,&QWPw(1,$bp)); | ||
| 35 | &sub($a0,$b0,$a0); # do the subtract | ||
| 36 | &ld($a2,&QWPw(2,$ap)); | ||
| 37 | &cmpult($a0,$cc,$b0); # will we borrow? | ||
| 38 | &ld($b2,&QWPw(2,$bp)); | ||
| 39 | &sub($a0,$cc,$o0); # will we borrow? | ||
| 40 | &ld($a3,&QWPw(3,$ap)); | ||
| 41 | &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp); | ||
| 42 | |||
| 43 | &cmpult($a1,$b1,$t1); # will we borrow? | ||
| 44 | &sub($a1,$b1,$a1); # do the subtract | ||
| 45 | &ld($b3,&QWPw(3,$bp)); | ||
| 46 | &cmpult($a1,$cc,$b1); # will we borrow? | ||
| 47 | &sub($a1,$cc,$o1); # will we borrow? | ||
| 48 | &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1); | ||
| 49 | |||
| 50 | &cmpult($a2,$b2,$tmp); # will we borrow? | ||
| 51 | &sub($a2,$b2,$a2); # do the subtract | ||
| 52 | &st($o0,&QWPw(0,$rp)); &FR($o0); # save | ||
| 53 | &cmpult($a2,$cc,$b2); # will we borrow? | ||
| 54 | &sub($a2,$cc,$o2); # will we borrow? | ||
| 55 | &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2); | ||
| 56 | |||
| 57 | &cmpult($a3,$b3,$t3); # will we borrow? | ||
| 58 | &sub($a3,$b3,$a3); # do the subtract | ||
| 59 | &st($o1,&QWPw(1,$rp)); &FR($o1); | ||
| 60 | &cmpult($a3,$cc,$b3); # will we borrow? | ||
| 61 | &sub($a3,$cc,$o3); # will we borrow? | ||
| 62 | &add($b3,$t3,$cc); &FR($t3,$a3,$b3); | ||
| 63 | |||
| 64 | &st($o2,&QWPw(2,$rp)); &FR($o2); | ||
| 65 | &sub($count,4,$count); # count-=4 | ||
| 66 | &st($o3,&QWPw(3,$rp)); &FR($o3); | ||
| 67 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 68 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 69 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 70 | |||
| 71 | &blt($count,&label("finish")); | ||
| 72 | &ld($a0,&QWPw(0,$ap)); | ||
| 73 | &ld($b0,&QWPw(0,$bp)); | ||
| 74 | &br(&label("loop")); | ||
| 75 | ################################################## | ||
| 76 | # Do the last 0..3 words | ||
| 77 | |||
| 78 | &set_label("last_loop"); | ||
| 79 | |||
| 80 | &ld($a0,&QWPw(0,$ap)); # get a | ||
| 81 | &ld($b0,&QWPw(0,$bp)); # get b | ||
| 82 | &cmpult($a0,$b0,$tmp); # will we borrow? | ||
| 83 | &sub($a0,$b0,$a0); # do the subtract | ||
| 84 | &cmpult($a0,$cc,$b0); # will we borrow? | ||
| 85 | &sub($a0,$cc,$a0); # will we borrow? | ||
| 86 | &st($a0,&QWPw(0,$rp)); # save | ||
| 87 | &add($b0,$tmp,$cc); # add the borrows | ||
| 88 | |||
| 89 | &add($ap,$QWS,$ap); | ||
| 90 | &add($bp,$QWS,$bp); | ||
| 91 | &add($rp,$QWS,$rp); | ||
| 92 | &sub($count,1,$count); | ||
| 93 | &bgt($count,&label("last_loop")); | ||
| 94 | &function_end_A($name); | ||
| 95 | |||
| 96 | ###################################################### | ||
| 97 | &set_label("finish"); | ||
| 98 | &add($count,4,$count); | ||
| 99 | &bgt($count,&label("last_loop")); | ||
| 100 | |||
| 101 | &FR($a0,$b0); | ||
| 102 | &set_label("end"); | ||
| 103 | &function_end($name); | ||
| 104 | |||
| 105 | &fin_pool; | ||
| 106 | } | ||
| 107 | |||
| 108 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/add.pl b/src/lib/libcrypto/bn/asm/alpha/add.pl new file mode 100644 index 0000000000..13bf516428 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/add.pl | |||
| @@ -0,0 +1,118 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_add_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $bp=&wparam(2); | ||
| 15 | $count=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | &blt($count,&label("finish")); | ||
| 23 | |||
| 24 | ($a0,$b0)=&NR(2); | ||
| 25 | |||
| 26 | ########################################################## | ||
| 27 | &set_label("loop"); | ||
| 28 | |||
| 29 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); | ||
| 30 | &ld(($b0)=&NR(1),&QWPw(0,$bp)); | ||
| 31 | &ld(($a1)=&NR(1),&QWPw(1,$ap)); | ||
| 32 | &ld(($b1)=&NR(1),&QWPw(1,$bp)); | ||
| 33 | |||
| 34 | ($o0,$t0)=&NR(2); | ||
| 35 | &add($a0,$b0,$o0); | ||
| 36 | &ld(($a2)=&NR(1),&QWPw(2,$ap)); | ||
| 37 | &cmpult($o0,$b0,$t0); | ||
| 38 | &add($o0,$cc,$o0); | ||
| 39 | &cmpult($o0,$cc,$cc); | ||
| 40 | &ld(($b2)=&NR(1),&QWPw(2,$bp)); | ||
| 41 | &add($cc,$t0,$cc); &FR($t0); | ||
| 42 | |||
| 43 | ($t1,$o1)=&NR(2); | ||
| 44 | |||
| 45 | &add($a1,$b1,$o1); &FR($a1); | ||
| 46 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
| 47 | &add($o1,$cc,$o1); | ||
| 48 | &cmpult($o1,$cc,$cc); | ||
| 49 | &ld(($a3)=&NR(1),&QWPw(3,$ap)); | ||
| 50 | &add($cc,$t1,$cc); &FR($t1); | ||
| 51 | |||
| 52 | ($t2,$o2)=&NR(2); | ||
| 53 | |||
| 54 | &add($a2,$b2,$o2); &FR($a2); | ||
| 55 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
| 56 | &add($o2,$cc,$o2); | ||
| 57 | &cmpult($o2,$cc,$cc); | ||
| 58 | &ld(($b3)=&NR(1),&QWPw(3,$bp)); | ||
| 59 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
| 60 | &add($cc,$t2,$cc); &FR($t2); | ||
| 61 | |||
| 62 | ($t3,$o3)=&NR(2); | ||
| 63 | |||
| 64 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
| 65 | &add($a3,$b3,$o3); &FR($a3); | ||
| 66 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
| 67 | &add($o3,$cc,$o3); | ||
| 68 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
| 69 | &cmpult($o3,$cc,$cc); | ||
| 70 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
| 71 | &add($cc,$t3,$cc); &FR($t3); | ||
| 72 | |||
| 73 | |||
| 74 | &sub($count,4,$count); # count-=4 | ||
| 75 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 76 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 77 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 78 | |||
| 79 | ### | ||
| 80 | &bge($count,&label("loop")); | ||
| 81 | ### | ||
| 82 | &br(&label("finish")); | ||
| 83 | ################################################## | ||
| 84 | # Do the last 0..3 words | ||
| 85 | |||
| 86 | ($t0,$o0)=&NR(2); | ||
| 87 | &set_label("last_loop"); | ||
| 88 | |||
| 89 | &ld($a0,&QWPw(0,$ap)); # get a | ||
| 90 | &ld($b0,&QWPw(0,$bp)); # get b | ||
| 91 | &add($ap,$QWS,$ap); | ||
| 92 | &add($bp,$QWS,$bp); | ||
| 93 | &add($a0,$b0,$o0); | ||
| 94 | &sub($count,1,$count); | ||
| 95 | &cmpult($o0,$b0,$t0); # will we borrow? | ||
| 96 | &add($o0,$cc,$o0); # will we borrow? | ||
| 97 | &cmpult($o0,$cc,$cc); # will we borrow? | ||
| 98 | &add($rp,$QWS,$rp); | ||
| 99 | &st($o0,&QWPw(-1,$rp)); # save | ||
| 100 | &add($cc,$t0,$cc); # add the borrows | ||
| 101 | |||
| 102 | ### | ||
| 103 | &bgt($count,&label("last_loop")); | ||
| 104 | &function_end_A($name); | ||
| 105 | |||
| 106 | ###################################################### | ||
| 107 | &set_label("finish"); | ||
| 108 | &add($count,4,$count); | ||
| 109 | &bgt($count,&label("last_loop")); | ||
| 110 | |||
| 111 | &FR($o0,$t0,$a0,$b0); | ||
| 112 | &set_label("end"); | ||
| 113 | &function_end($name); | ||
| 114 | |||
| 115 | &fin_pool; | ||
| 116 | } | ||
| 117 | |||
| 118 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/div.pl b/src/lib/libcrypto/bn/asm/alpha/div.pl new file mode 100644 index 0000000000..e9e680897a --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/div.pl | |||
| @@ -0,0 +1,144 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | |||
| 3 | sub bn_div_words | ||
| 4 | { | ||
| 5 | local($data)=<<'EOF'; | ||
| 6 | # | ||
| 7 | # What follows was taken directly from the C compiler with a few | ||
| 8 | # hacks to redo the lables. | ||
| 9 | # | ||
| 10 | .text | ||
| 11 | .set noreorder | ||
| 12 | .set volatile | ||
| 13 | .align 3 | ||
| 14 | .globl bn_div_words | ||
| 15 | .ent bn_div_words | ||
| 16 | bn_div_words | ||
| 17 | ldgp $29,0($27) | ||
| 18 | bn_div_words.ng: | ||
| 19 | lda $30,-48($30) | ||
| 20 | .frame $30,48,$26,0 | ||
| 21 | stq $26,0($30) | ||
| 22 | stq $9,8($30) | ||
| 23 | stq $10,16($30) | ||
| 24 | stq $11,24($30) | ||
| 25 | stq $12,32($30) | ||
| 26 | stq $13,40($30) | ||
| 27 | .mask 0x4003e00,-48 | ||
| 28 | .prologue 1 | ||
| 29 | bis $16,$16,$9 | ||
| 30 | bis $17,$17,$10 | ||
| 31 | bis $18,$18,$11 | ||
| 32 | bis $31,$31,$13 | ||
| 33 | bis $31,2,$12 | ||
| 34 | bne $11,$9119 | ||
| 35 | lda $0,-1 | ||
| 36 | br $31,$9136 | ||
| 37 | .align 4 | ||
| 38 | $9119: | ||
| 39 | bis $11,$11,$16 | ||
| 40 | jsr $26,BN_num_bits_word | ||
| 41 | ldgp $29,0($26) | ||
| 42 | subq $0,64,$1 | ||
| 43 | beq $1,$9120 | ||
| 44 | bis $31,1,$1 | ||
| 45 | sll $1,$0,$1 | ||
| 46 | cmpule $9,$1,$1 | ||
| 47 | bne $1,$9120 | ||
| 48 | # lda $16,_IO_stderr_ | ||
| 49 | # lda $17,$C32 | ||
| 50 | # bis $0,$0,$18 | ||
| 51 | # jsr $26,fprintf | ||
| 52 | # ldgp $29,0($26) | ||
| 53 | jsr $26,abort | ||
| 54 | ldgp $29,0($26) | ||
| 55 | .align 4 | ||
| 56 | $9120: | ||
| 57 | bis $31,64,$3 | ||
| 58 | cmpult $9,$11,$2 | ||
| 59 | subq $3,$0,$1 | ||
| 60 | addl $1,$31,$0 | ||
| 61 | subq $9,$11,$1 | ||
| 62 | cmoveq $2,$1,$9 | ||
| 63 | beq $0,$9122 | ||
| 64 | zapnot $0,15,$2 | ||
| 65 | subq $3,$0,$1 | ||
| 66 | sll $11,$2,$11 | ||
| 67 | sll $9,$2,$3 | ||
| 68 | srl $10,$1,$1 | ||
| 69 | sll $10,$2,$10 | ||
| 70 | bis $3,$1,$9 | ||
| 71 | $9122: | ||
| 72 | srl $11,32,$5 | ||
| 73 | zapnot $11,15,$6 | ||
| 74 | lda $7,-1 | ||
| 75 | .align 5 | ||
| 76 | $9123: | ||
| 77 | srl $9,32,$1 | ||
| 78 | subq $1,$5,$1 | ||
| 79 | bne $1,$9126 | ||
| 80 | zapnot $7,15,$27 | ||
| 81 | br $31,$9127 | ||
| 82 | .align 4 | ||
| 83 | $9126: | ||
| 84 | bis $9,$9,$24 | ||
| 85 | bis $5,$5,$25 | ||
| 86 | divqu $24,$25,$27 | ||
| 87 | $9127: | ||
| 88 | srl $10,32,$4 | ||
| 89 | .align 5 | ||
| 90 | $9128: | ||
| 91 | mulq $27,$5,$1 | ||
| 92 | subq $9,$1,$3 | ||
| 93 | zapnot $3,240,$1 | ||
| 94 | bne $1,$9129 | ||
| 95 | mulq $6,$27,$2 | ||
| 96 | sll $3,32,$1 | ||
| 97 | addq $1,$4,$1 | ||
| 98 | cmpule $2,$1,$2 | ||
| 99 | bne $2,$9129 | ||
| 100 | subq $27,1,$27 | ||
| 101 | br $31,$9128 | ||
| 102 | .align 4 | ||
| 103 | $9129: | ||
| 104 | mulq $27,$6,$1 | ||
| 105 | mulq $27,$5,$4 | ||
| 106 | srl $1,32,$3 | ||
| 107 | sll $1,32,$1 | ||
| 108 | addq $4,$3,$4 | ||
| 109 | cmpult $10,$1,$2 | ||
| 110 | subq $10,$1,$10 | ||
| 111 | addq $2,$4,$2 | ||
| 112 | cmpult $9,$2,$1 | ||
| 113 | bis $2,$2,$4 | ||
| 114 | beq $1,$9134 | ||
| 115 | addq $9,$11,$9 | ||
| 116 | subq $27,1,$27 | ||
| 117 | $9134: | ||
| 118 | subl $12,1,$12 | ||
| 119 | subq $9,$4,$9 | ||
| 120 | beq $12,$9124 | ||
| 121 | sll $27,32,$13 | ||
| 122 | sll $9,32,$2 | ||
| 123 | srl $10,32,$1 | ||
| 124 | sll $10,32,$10 | ||
| 125 | bis $2,$1,$9 | ||
| 126 | br $31,$9123 | ||
| 127 | .align 4 | ||
| 128 | $9124: | ||
| 129 | bis $13,$27,$0 | ||
| 130 | $9136: | ||
| 131 | ldq $26,0($30) | ||
| 132 | ldq $9,8($30) | ||
| 133 | ldq $10,16($30) | ||
| 134 | ldq $11,24($30) | ||
| 135 | ldq $12,32($30) | ||
| 136 | ldq $13,40($30) | ||
| 137 | addq $30,48,$30 | ||
| 138 | ret $31,($26),1 | ||
| 139 | .end bn_div_words | ||
| 140 | EOF | ||
| 141 | &asm_add($data); | ||
| 142 | } | ||
| 143 | |||
| 144 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul.pl b/src/lib/libcrypto/bn/asm/alpha/mul.pl new file mode 100644 index 0000000000..76c926566c --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul.pl | |||
| @@ -0,0 +1,104 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_mul_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r,$couny); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $count=&wparam(2); | ||
| 15 | $word=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | ### | ||
| 23 | &blt($count,&label("finish")); | ||
| 24 | |||
| 25 | ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap)); | ||
| 26 | |||
| 27 | &set_label("loop"); | ||
| 28 | |||
| 29 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
| 30 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
| 31 | |||
| 32 | &muh($a0,$word,($h0)=&NR(1)); &FR($a0); | ||
| 33 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
| 34 | ### wait 8 | ||
| 35 | &mul($a0,$word,($l0)=&NR(1)); &FR($a0); | ||
| 36 | ### wait 8 | ||
| 37 | &muh($a1,$word,($h1)=&NR(1)); &FR($a1); | ||
| 38 | &add($l0,$cc,$l0); ### wait 8 | ||
| 39 | &mul($a1,$word,($l1)=&NR(1)); &FR($a1); | ||
| 40 | &cmpult($l0,$cc,$cc); ### wait 8 | ||
| 41 | &muh($a2,$word,($h2)=&NR(1)); &FR($a2); | ||
| 42 | &add($h0,$cc,$cc); &FR($h0); ### wait 8 | ||
| 43 | &mul($a2,$word,($l2)=&NR(1)); &FR($a2); | ||
| 44 | &add($l1,$cc,$l1); ### wait 8 | ||
| 45 | &st($l0,&QWPw(0,$rp)); &FR($l0); | ||
| 46 | &cmpult($l1,$cc,$cc); ### wait 8 | ||
| 47 | &muh($a3,$word,($h3)=&NR(1)); &FR($a3); | ||
| 48 | &add($h1,$cc,$cc); &FR($h1); | ||
| 49 | &mul($a3,$word,($l3)=&NR(1)); &FR($a3); | ||
| 50 | &add($l2,$cc,$l2); | ||
| 51 | &st($l1,&QWPw(1,$rp)); &FR($l1); | ||
| 52 | &cmpult($l2,$cc,$cc); | ||
| 53 | &add($h2,$cc,$cc); &FR($h2); | ||
| 54 | &sub($count,4,$count); # count-=4 | ||
| 55 | &st($l2,&QWPw(2,$rp)); &FR($l2); | ||
| 56 | &add($l3,$cc,$l3); | ||
| 57 | &cmpult($l3,$cc,$cc); | ||
| 58 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 59 | &add($h3,$cc,$cc); &FR($h3); | ||
| 60 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 61 | &st($l3,&QWPw(3,$rp)); &FR($l3); | ||
| 62 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 63 | ### | ||
| 64 | &blt($count,&label("finish")); | ||
| 65 | ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap)); | ||
| 66 | &br(&label("finish")); | ||
| 67 | ################################################## | ||
| 68 | |||
| 69 | ################################################## | ||
| 70 | # Do the last 0..3 words | ||
| 71 | |||
| 72 | &set_label("last_loop"); | ||
| 73 | |||
| 74 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
| 75 | ### | ||
| 76 | ### | ||
| 77 | ### | ||
| 78 | &muh($a0,$word,($h0)=&NR(1)); | ||
| 79 | ### Wait 8 for next mul issue | ||
| 80 | &mul($a0,$word,($l0)=&NR(1)); &FR($a0) | ||
| 81 | &add($ap,$QWS,$ap); | ||
| 82 | ### Loose 12 until result is available | ||
| 83 | &add($rp,$QWS,$rp); | ||
| 84 | &sub($count,1,$count); | ||
| 85 | &add($l0,$cc,$l0); | ||
| 86 | ### | ||
| 87 | &st($l0,&QWPw(-1,$rp)); &FR($l0); | ||
| 88 | &cmpult($l0,$cc,$cc); | ||
| 89 | &add($h0,$cc,$cc); &FR($h0); | ||
| 90 | &bgt($count,&label("last_loop")); | ||
| 91 | &function_end_A($name); | ||
| 92 | |||
| 93 | ###################################################### | ||
| 94 | &set_label("finish"); | ||
| 95 | &add($count,4,$count); | ||
| 96 | &bgt($count,&label("last_loop")); | ||
| 97 | |||
| 98 | &set_label("end"); | ||
| 99 | &function_end($name); | ||
| 100 | |||
| 101 | &fin_pool; | ||
| 102 | } | ||
| 103 | |||
| 104 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_add.pl b/src/lib/libcrypto/bn/asm/alpha/mul_add.pl new file mode 100644 index 0000000000..0d6df69bc4 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul_add.pl | |||
| @@ -0,0 +1,123 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_mul_add_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r,$couny); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $count=&wparam(2); | ||
| 15 | $word=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | ### | ||
| 23 | &blt($count,&label("finish")); | ||
| 24 | |||
| 25 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); | ||
| 26 | |||
| 27 | $a=<<'EOF'; | ||
| 28 | ########################################################## | ||
| 29 | &set_label("loop"); | ||
| 30 | |||
| 31 | &ld(($r0)=&NR(1),&QWPw(0,$rp)); | ||
| 32 | &ld(($a1)=&NR(1),&QWPw(1,$ap)); | ||
| 33 | &muh($a0,$word,($h0)=&NR(1)); | ||
| 34 | &ld(($r1)=&NR(1),&QWPw(1,$rp)); | ||
| 35 | &ld(($a2)=&NR(1),&QWPw(2,$ap)); | ||
| 36 | ### | ||
| 37 | &mul($a0,$word,($l0)=&NR(1)); &FR($a0); | ||
| 38 | &ld(($r2)=&NR(1),&QWPw(2,$rp)); | ||
| 39 | &muh($a1,$word,($h1)=&NR(1)); | ||
| 40 | &ld(($a3)=&NR(1),&QWPw(3,$ap)); | ||
| 41 | &mul($a1,$word,($l1)=&NR(1)); &FR($a1); | ||
| 42 | &ld(($r3)=&NR(1),&QWPw(3,$rp)); | ||
| 43 | &add($r0,$l0,$r0); | ||
| 44 | &add($r1,$l1,$r1); | ||
| 45 | &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); | ||
| 46 | &cmpult($r1,$l1,($t1)=&NR(1)); &FR($l1); | ||
| 47 | &muh($a2,$word,($h2)=&NR(1)); | ||
| 48 | &add($r0,$cc,$r0); | ||
| 49 | &add($h0,$t0,$h0); &FR($t0); | ||
| 50 | &cmpult($r0,$cc,$cc); | ||
| 51 | &add($h1,$t1,$h1); &FR($t1); | ||
| 52 | &add($h0,$cc,$cc); &FR($h0); | ||
| 53 | &mul($a2,$word,($l2)=&NR(1)); &FR($a2); | ||
| 54 | &add($r1,$cc,$r1); | ||
| 55 | &cmpult($r1,$cc,$cc); | ||
| 56 | &add($r2,$l2,$r2); | ||
| 57 | &add($h1,$cc,$cc); &FR($h1); | ||
| 58 | &cmpult($r2,$l2,($t2)=&NR(1)); &FR($l2); | ||
| 59 | &muh($a3,$word,($h3)=&NR(1)); | ||
| 60 | &add($r2,$cc,$r2); | ||
| 61 | &st($r0,&QWPw(0,$rp)); &FR($r0); | ||
| 62 | &add($h2,$t2,$h2); &FR($t2); | ||
| 63 | &st($r1,&QWPw(1,$rp)); &FR($r1); | ||
| 64 | &cmpult($r2,$cc,$cc); | ||
| 65 | &mul($a3,$word,($l3)=&NR(1)); &FR($a3); | ||
| 66 | &add($h2,$cc,$cc); &FR($h2); | ||
| 67 | &st($r2,&QWPw(2,$rp)); &FR($r2); | ||
| 68 | &sub($count,4,$count); # count-=4 | ||
| 69 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 70 | &add($r3,$l3,$r3); | ||
| 71 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 72 | &cmpult($r3,$l3,($t3)=&NR(1)); &FR($l3); | ||
| 73 | &add($r3,$cc,$r3); | ||
| 74 | &add($h3,$t3,$h3); &FR($t3); | ||
| 75 | &cmpult($r3,$cc,$cc); | ||
| 76 | &st($r3,&QWPw(-1,$rp)); &FR($r3); | ||
| 77 | &add($h3,$cc,$cc); &FR($h3); | ||
| 78 | |||
| 79 | ### | ||
| 80 | &blt($count,&label("finish")); | ||
| 81 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); | ||
| 82 | &br(&label("loop")); | ||
| 83 | EOF | ||
| 84 | ################################################## | ||
| 85 | # Do the last 0..3 words | ||
| 86 | |||
| 87 | &set_label("last_loop"); | ||
| 88 | |||
| 89 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
| 90 | &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b | ||
| 91 | ### | ||
| 92 | ### | ||
| 93 | &muh($a0,$word,($h0)=&NR(1)); &FR($a0); | ||
| 94 | ### wait 8 | ||
| 95 | &mul($a0,$word,($l0)=&NR(1)); &FR($a0); | ||
| 96 | &add($rp,$QWS,$rp); | ||
| 97 | &add($ap,$QWS,$ap); | ||
| 98 | &sub($count,1,$count); | ||
| 99 | ### wait 3 until l0 is available | ||
| 100 | &add($r0,$l0,$r0); | ||
| 101 | ### | ||
| 102 | &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); | ||
| 103 | &add($r0,$cc,$r0); | ||
| 104 | &add($h0,$t0,$h0); &FR($t0); | ||
| 105 | &cmpult($r0,$cc,$cc); | ||
| 106 | &add($h0,$cc,$cc); &FR($h0); | ||
| 107 | |||
| 108 | &st($r0,&QWPw(-1,$rp)); &FR($r0); | ||
| 109 | &bgt($count,&label("last_loop")); | ||
| 110 | &function_end_A($name); | ||
| 111 | |||
| 112 | ###################################################### | ||
| 113 | &set_label("finish"); | ||
| 114 | &add($count,4,$count); | ||
| 115 | &bgt($count,&label("last_loop")); | ||
| 116 | |||
| 117 | &set_label("end"); | ||
| 118 | &function_end($name); | ||
| 119 | |||
| 120 | &fin_pool; | ||
| 121 | } | ||
| 122 | |||
| 123 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl new file mode 100644 index 0000000000..9cc876ded4 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl | |||
| @@ -0,0 +1,215 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | # upto | ||
| 5 | |||
| 6 | sub mul_add_c | ||
| 7 | { | ||
| 8 | local($a,$b,$c0,$c1,$c2)=@_; | ||
| 9 | local($l1,$h1,$t1,$t2); | ||
| 10 | |||
| 11 | &mul($a,$b,($l1)=&NR(1)); | ||
| 12 | &muh($a,$b,($h1)=&NR(1)); | ||
| 13 | &add($c0,$l1,$c0); | ||
| 14 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
| 15 | &add($t1,$h1,$h1); &FR($t1); | ||
| 16 | &add($c1,$h1,$c1); | ||
| 17 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
| 18 | &add($c2,$t2,$c2); &FR($t2); | ||
| 19 | } | ||
| 20 | |||
| 21 | sub bn_mul_comba4 | ||
| 22 | { | ||
| 23 | local($name)=@_; | ||
| 24 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 25 | |||
| 26 | $cnt=1; | ||
| 27 | &init_pool(3); | ||
| 28 | |||
| 29 | $rp=&wparam(0); | ||
| 30 | $ap=&wparam(1); | ||
| 31 | $bp=&wparam(2); | ||
| 32 | |||
| 33 | &function_begin($name,""); | ||
| 34 | |||
| 35 | &comment(""); | ||
| 36 | |||
| 37 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 38 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
| 39 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 40 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
| 41 | &mul($a[0],$b[0],($r00)=&NR(1)); | ||
| 42 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 43 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
| 44 | &muh($a[0],$b[0],($r01)=&NR(1)); | ||
| 45 | &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
| 46 | &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp)); | ||
| 47 | &mul($a[0],$b[1],($r02)=&NR(1)); | ||
| 48 | |||
| 49 | ($R,$H1,$H2)=&NR(3); | ||
| 50 | |||
| 51 | &st($r00,&QWPw(0,$rp)); &FR($r00); | ||
| 52 | |||
| 53 | &mov("zero",$R); | ||
| 54 | &mul($a[1],$b[0],($r03)=&NR(1)); | ||
| 55 | |||
| 56 | &mov("zero",$H1); | ||
| 57 | &mov("zero",$H0); | ||
| 58 | &add($R,$r01,$R); | ||
| 59 | &muh($a[0],$b[1],($r04)=&NR(1)); | ||
| 60 | &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01); | ||
| 61 | &add($R,$r02,$R); | ||
| 62 | &add($H1,$t01,$H1) &FR($t01); | ||
| 63 | &muh($a[1],$b[0],($r05)=&NR(1)); | ||
| 64 | &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02); | ||
| 65 | &add($R,$r03,$R); | ||
| 66 | &add($H2,$t02,$H2) &FR($t02); | ||
| 67 | &mul($a[0],$b[2],($r06)=&NR(1)); | ||
| 68 | &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03); | ||
| 69 | &add($H1,$t03,$H1) &FR($t03); | ||
| 70 | &st($R,&QWPw(1,$rp)); | ||
| 71 | &add($H1,$H2,$R); | ||
| 72 | |||
| 73 | &mov("zero",$H1); | ||
| 74 | &add($R,$r04,$R); | ||
| 75 | &mov("zero",$H2); | ||
| 76 | &mul($a[1],$b[1],($r07)=&NR(1)); | ||
| 77 | &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04); | ||
| 78 | &add($R,$r05,$R); | ||
| 79 | &add($H1,$t04,$H1) &FR($t04); | ||
| 80 | &mul($a[2],$b[0],($r08)=&NR(1)); | ||
| 81 | &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05); | ||
| 82 | &add($R,$r01,$R); | ||
| 83 | &add($H2,$t05,$H2) &FR($t05); | ||
| 84 | &muh($a[0],$b[2],($r09)=&NR(1)); | ||
| 85 | &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06); | ||
| 86 | &add($R,$r07,$R); | ||
| 87 | &add($H1,$t06,$H1) &FR($t06); | ||
| 88 | &muh($a[1],$b[1],($r10)=&NR(1)); | ||
| 89 | &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07); | ||
| 90 | &add($R,$r08,$R); | ||
| 91 | &add($H2,$t07,$H2) &FR($t07); | ||
| 92 | &muh($a[2],$b[0],($r11)=&NR(1)); | ||
| 93 | &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08); | ||
| 94 | &add($H1,$t08,$H1) &FR($t08); | ||
| 95 | &st($R,&QWPw(2,$rp)); | ||
| 96 | &add($H1,$H2,$R); | ||
| 97 | |||
| 98 | &mov("zero",$H1); | ||
| 99 | &add($R,$r09,$R); | ||
| 100 | &mov("zero",$H2); | ||
| 101 | &mul($a[0],$b[3],($r12)=&NR(1)); | ||
| 102 | &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09); | ||
| 103 | &add($R,$r10,$R); | ||
| 104 | &add($H1,$t09,$H1) &FR($t09); | ||
| 105 | &mul($a[1],$b[2],($r13)=&NR(1)); | ||
| 106 | &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10); | ||
| 107 | &add($R,$r11,$R); | ||
| 108 | &add($H1,$t10,$H1) &FR($t10); | ||
| 109 | &mul($a[2],$b[1],($r14)=&NR(1)); | ||
| 110 | &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11); | ||
| 111 | &add($R,$r12,$R); | ||
| 112 | &add($H1,$t11,$H1) &FR($t11); | ||
| 113 | &mul($a[3],$b[0],($r15)=&NR(1)); | ||
| 114 | &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12); | ||
| 115 | &add($R,$r13,$R); | ||
| 116 | &add($H1,$t12,$H1) &FR($t12); | ||
| 117 | &muh($a[0],$b[3],($r16)=&NR(1)); | ||
| 118 | &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13); | ||
| 119 | &add($R,$r14,$R); | ||
| 120 | &add($H1,$t13,$H1) &FR($t13); | ||
| 121 | &muh($a[1],$b[2],($r17)=&NR(1)); | ||
| 122 | &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14); | ||
| 123 | &add($R,$r15,$R); | ||
| 124 | &add($H1,$t14,$H1) &FR($t14); | ||
| 125 | &muh($a[2],$b[1],($r18)=&NR(1)); | ||
| 126 | &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15); | ||
| 127 | &add($H1,$t15,$H1) &FR($t15); | ||
| 128 | &st($R,&QWPw(3,$rp)); | ||
| 129 | &add($H1,$H2,$R); | ||
| 130 | |||
| 131 | &mov("zero",$H1); | ||
| 132 | &add($R,$r16,$R); | ||
| 133 | &mov("zero",$H2); | ||
| 134 | &muh($a[3],$b[0],($r19)=&NR(1)); | ||
| 135 | &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16); | ||
| 136 | &add($R,$r17,$R); | ||
| 137 | &add($H1,$t16,$H1) &FR($t16); | ||
| 138 | &mul($a[1],$b[3],($r20)=&NR(1)); | ||
| 139 | &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17); | ||
| 140 | &add($R,$r18,$R); | ||
| 141 | &add($H1,$t17,$H1) &FR($t17); | ||
| 142 | &mul($a[2],$b[2],($r21)=&NR(1)); | ||
| 143 | &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18); | ||
| 144 | &add($R,$r19,$R); | ||
| 145 | &add($H1,$t18,$H1) &FR($t18); | ||
| 146 | &mul($a[3],$b[1],($r22)=&NR(1)); | ||
| 147 | &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19); | ||
| 148 | &add($R,$r20,$R); | ||
| 149 | &add($H1,$t19,$H1) &FR($t19); | ||
| 150 | &muh($a[1],$b[3],($r23)=&NR(1)); | ||
| 151 | &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20); | ||
| 152 | &add($R,$r21,$R); | ||
| 153 | &add($H1,$t20,$H1) &FR($t20); | ||
| 154 | &muh($a[2],$b[2],($r24)=&NR(1)); | ||
| 155 | &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21); | ||
| 156 | &add($R,$r22,$R); | ||
| 157 | &add($H1,$t21,$H1) &FR($t21); | ||
| 158 | &muh($a[3],$b[1],($r25)=&NR(1)); | ||
| 159 | &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22); | ||
| 160 | &add($H1,$t22,$H1) &FR($t22); | ||
| 161 | &st($R,&QWPw(4,$rp)); | ||
| 162 | &add($H1,$H2,$R); | ||
| 163 | |||
| 164 | &mov("zero",$H1); | ||
| 165 | &add($R,$r23,$R); | ||
| 166 | &mov("zero",$H2); | ||
| 167 | &mul($a[2],$b[3],($r26)=&NR(1)); | ||
| 168 | &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23); | ||
| 169 | &add($R,$r24,$R); | ||
| 170 | &add($H1,$t23,$H1) &FR($t23); | ||
| 171 | &mul($a[3],$b[2],($r27)=&NR(1)); | ||
| 172 | &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24); | ||
| 173 | &add($R,$r25,$R); | ||
| 174 | &add($H1,$t24,$H1) &FR($t24); | ||
| 175 | &muh($a[2],$b[3],($r28)=&NR(1)); | ||
| 176 | &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25); | ||
| 177 | &add($R,$r26,$R); | ||
| 178 | &add($H1,$t25,$H1) &FR($t25); | ||
| 179 | &muh($a[3],$b[2],($r29)=&NR(1)); | ||
| 180 | &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26); | ||
| 181 | &add($R,$r27,$R); | ||
| 182 | &add($H1,$t26,$H1) &FR($t26); | ||
| 183 | &mul($a[3],$b[3],($r30)=&NR(1)); | ||
| 184 | &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27); | ||
| 185 | &add($H1,$t27,$H1) &FR($t27); | ||
| 186 | &st($R,&QWPw(5,$rp)); | ||
| 187 | &add($H1,$H2,$R); | ||
| 188 | |||
| 189 | &mov("zero",$H1); | ||
| 190 | &add($R,$r28,$R); | ||
| 191 | &mov("zero",$H2); | ||
| 192 | &muh($a[3],$b[3],($r31)=&NR(1)); | ||
| 193 | &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28); | ||
| 194 | &add($R,$r29,$R); | ||
| 195 | &add($H1,$t28,$H1) &FR($t28); | ||
| 196 | ############ | ||
| 197 | &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29); | ||
| 198 | &add($R,$r30,$R); | ||
| 199 | &add($H1,$t29,$H1) &FR($t29); | ||
| 200 | ############ | ||
| 201 | &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30); | ||
| 202 | &add($H1,$t30,$H1) &FR($t30); | ||
| 203 | &st($R,&QWPw(6,$rp)); | ||
| 204 | &add($H1,$H2,$R); | ||
| 205 | |||
| 206 | &add($R,$r31,$R); &FR($r31); | ||
| 207 | &st($R,&QWPw(7,$rp)); | ||
| 208 | |||
| 209 | &FR($R,$H1,$H2); | ||
| 210 | &function_end($name); | ||
| 211 | |||
| 212 | &fin_pool; | ||
| 213 | } | ||
| 214 | |||
| 215 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl new file mode 100644 index 0000000000..79d86dd25c --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl | |||
| @@ -0,0 +1,98 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub mul_add_c | ||
| 5 | { | ||
| 6 | local($a,$b,$c0,$c1,$c2)=@_; | ||
| 7 | local($l1,$h1,$t1,$t2); | ||
| 8 | |||
| 9 | print STDERR "count=$cnt\n"; $cnt++; | ||
| 10 | &mul($a,$b,($l1)=&NR(1)); | ||
| 11 | &muh($a,$b,($h1)=&NR(1)); | ||
| 12 | &add($c0,$l1,$c0); | ||
| 13 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
| 14 | &add($t1,$h1,$h1); &FR($t1); | ||
| 15 | &add($c1,$h1,$c1); | ||
| 16 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
| 17 | &add($c2,$t2,$c2); &FR($t2); | ||
| 18 | } | ||
| 19 | |||
| 20 | sub bn_mul_comba4 | ||
| 21 | { | ||
| 22 | local($name)=@_; | ||
| 23 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 24 | |||
| 25 | $cnt=1; | ||
| 26 | &init_pool(3); | ||
| 27 | |||
| 28 | $rp=&wparam(0); | ||
| 29 | $ap=&wparam(1); | ||
| 30 | $bp=&wparam(2); | ||
| 31 | |||
| 32 | &function_begin($name,""); | ||
| 33 | |||
| 34 | &comment(""); | ||
| 35 | |||
| 36 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 37 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
| 38 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 39 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
| 40 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 41 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
| 42 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); | ||
| 43 | &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp); | ||
| 44 | |||
| 45 | ($c0,$c1,$c2)=&NR(3); | ||
| 46 | &mov("zero",$c2); | ||
| 47 | &mul($a[0],$b[0],$c0); | ||
| 48 | &muh($a[0],$b[0],$c1); | ||
| 49 | &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 50 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 51 | &mov("zero",$c2); | ||
| 52 | |||
| 53 | &mul_add_c($a[0],$b[1],$c0,$c1,$c2); | ||
| 54 | &mul_add_c($a[1],$b[0],$c0,$c1,$c2); | ||
| 55 | &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 56 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 57 | &mov("zero",$c2); | ||
| 58 | |||
| 59 | &mul_add_c($a[1],$b[1],$c0,$c1,$c2); | ||
| 60 | &mul_add_c($a[0],$b[2],$c0,$c1,$c2); | ||
| 61 | &mul_add_c($a[2],$b[0],$c0,$c1,$c2); | ||
| 62 | &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 63 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 64 | &mov("zero",$c2); | ||
| 65 | |||
| 66 | &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]); | ||
| 67 | &mul_add_c($a[1],$b[2],$c0,$c1,$c2); | ||
| 68 | &mul_add_c($a[2],$b[1],$c0,$c1,$c2); | ||
| 69 | &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]); | ||
| 70 | &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 71 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 72 | &mov("zero",$c2); | ||
| 73 | |||
| 74 | &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]); | ||
| 75 | &mul_add_c($a[2],$b[2],$c0,$c1,$c2); | ||
| 76 | &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]); | ||
| 77 | &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 78 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 79 | &mov("zero",$c2); | ||
| 80 | |||
| 81 | &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]); | ||
| 82 | &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]); | ||
| 83 | &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
| 84 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 85 | &mov("zero",$c2); | ||
| 86 | |||
| 87 | &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]); | ||
| 88 | &st($c0,&QWPw(6,$rp)); | ||
| 89 | &st($c1,&QWPw(7,$rp)); | ||
| 90 | |||
| 91 | &FR($c0,$c1,$c2); | ||
| 92 | |||
| 93 | &function_end($name); | ||
| 94 | |||
| 95 | &fin_pool; | ||
| 96 | } | ||
| 97 | |||
| 98 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl new file mode 100644 index 0000000000..525ca7494b --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl | |||
| @@ -0,0 +1,177 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_mul_comba8 | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 8 | |||
| 9 | $cnt=1; | ||
| 10 | &init_pool(3); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $bp=&wparam(2); | ||
| 15 | |||
| 16 | &function_begin($name,""); | ||
| 17 | |||
| 18 | &comment(""); | ||
| 19 | |||
| 20 | &stack_push(2); | ||
| 21 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 22 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
| 23 | &st($reg_s0,&swtmp(0)); &FR($reg_s0); | ||
| 24 | &st($reg_s1,&swtmp(1)); &FR($reg_s1); | ||
| 25 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 26 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
| 27 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 28 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
| 29 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
| 30 | &ld(($b[3])=&NR(1),&QWPw(3,$bp)); | ||
| 31 | &ld(($a[4])=&NR(1),&QWPw(1,$ap)); | ||
| 32 | &ld(($b[4])=&NR(1),&QWPw(1,$bp)); | ||
| 33 | &ld(($a[5])=&NR(1),&QWPw(1,$ap)); | ||
| 34 | &ld(($b[5])=&NR(1),&QWPw(1,$bp)); | ||
| 35 | &ld(($a[6])=&NR(1),&QWPw(1,$ap)); | ||
| 36 | &ld(($b[6])=&NR(1),&QWPw(1,$bp)); | ||
| 37 | &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap); | ||
| 38 | &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp); | ||
| 39 | |||
| 40 | ($c0,$c1,$c2)=&NR(3); | ||
| 41 | &mov("zero",$c2); | ||
| 42 | &mul($a[0],$b[0],$c0); | ||
| 43 | &muh($a[0],$b[0],$c1); | ||
| 44 | &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 45 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 46 | &mov("zero",$c2); | ||
| 47 | |||
| 48 | &mul_add_c($a[0],$b[1],$c0,$c1,$c2); | ||
| 49 | &mul_add_c($a[1],$b[0],$c0,$c1,$c2); | ||
| 50 | &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 51 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 52 | &mov("zero",$c2); | ||
| 53 | |||
| 54 | &mul_add_c($a[0],$b[2],$c0,$c1,$c2); | ||
| 55 | &mul_add_c($a[1],$b[1],$c0,$c1,$c2); | ||
| 56 | &mul_add_c($a[2],$b[0],$c0,$c1,$c2); | ||
| 57 | &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 58 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 59 | &mov("zero",$c2); | ||
| 60 | |||
| 61 | &mul_add_c($a[0],$b[3],$c0,$c1,$c2); | ||
| 62 | &mul_add_c($a[1],$b[2],$c0,$c1,$c2); | ||
| 63 | &mul_add_c($a[2],$b[1],$c0,$c1,$c2); | ||
| 64 | &mul_add_c($a[3],$b[0],$c0,$c1,$c2); | ||
| 65 | &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 66 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 67 | &mov("zero",$c2); | ||
| 68 | |||
| 69 | &mul_add_c($a[0],$b[4],$c0,$c1,$c2); | ||
| 70 | &mul_add_c($a[1],$b[3],$c0,$c1,$c2); | ||
| 71 | &mul_add_c($a[2],$b[2],$c0,$c1,$c2); | ||
| 72 | &mul_add_c($a[3],$b[1],$c0,$c1,$c2); | ||
| 73 | &mul_add_c($a[4],$b[0],$c0,$c1,$c2); | ||
| 74 | &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 75 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 76 | &mov("zero",$c2); | ||
| 77 | |||
| 78 | &mul_add_c($a[0],$b[5],$c0,$c1,$c2); | ||
| 79 | &mul_add_c($a[1],$b[4],$c0,$c1,$c2); | ||
| 80 | &mul_add_c($a[2],$b[3],$c0,$c1,$c2); | ||
| 81 | &mul_add_c($a[3],$b[2],$c0,$c1,$c2); | ||
| 82 | &mul_add_c($a[4],$b[1],$c0,$c1,$c2); | ||
| 83 | &mul_add_c($a[5],$b[0],$c0,$c1,$c2); | ||
| 84 | &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 85 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 86 | &mov("zero",$c2); | ||
| 87 | |||
| 88 | &mul_add_c($a[0],$b[6],$c0,$c1,$c2); | ||
| 89 | &mul_add_c($a[1],$b[5],$c0,$c1,$c2); | ||
| 90 | &mul_add_c($a[2],$b[4],$c0,$c1,$c2); | ||
| 91 | &mul_add_c($a[3],$b[3],$c0,$c1,$c2); | ||
| 92 | &mul_add_c($a[4],$b[2],$c0,$c1,$c2); | ||
| 93 | &mul_add_c($a[5],$b[1],$c0,$c1,$c2); | ||
| 94 | &mul_add_c($a[6],$b[0],$c0,$c1,$c2); | ||
| 95 | &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 96 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 97 | &mov("zero",$c2); | ||
| 98 | |||
| 99 | &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]); | ||
| 100 | &mul_add_c($a[1],$b[6],$c0,$c1,$c2); | ||
| 101 | &mul_add_c($a[2],$b[5],$c0,$c1,$c2); | ||
| 102 | &mul_add_c($a[3],$b[4],$c0,$c1,$c2); | ||
| 103 | &mul_add_c($a[4],$b[3],$c0,$c1,$c2); | ||
| 104 | &mul_add_c($a[5],$b[2],$c0,$c1,$c2); | ||
| 105 | &mul_add_c($a[6],$b[1],$c0,$c1,$c2); | ||
| 106 | &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]); | ||
| 107 | &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 108 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 109 | &mov("zero",$c2); | ||
| 110 | |||
| 111 | &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]); | ||
| 112 | &mul_add_c($a[2],$b[6],$c0,$c1,$c2); | ||
| 113 | &mul_add_c($a[3],$b[5],$c0,$c1,$c2); | ||
| 114 | &mul_add_c($a[4],$b[4],$c0,$c1,$c2); | ||
| 115 | &mul_add_c($a[5],$b[3],$c0,$c1,$c2); | ||
| 116 | &mul_add_c($a[6],$b[2],$c0,$c1,$c2); | ||
| 117 | &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]); | ||
| 118 | &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 119 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 120 | &mov("zero",$c2); | ||
| 121 | |||
| 122 | &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]); | ||
| 123 | &mul_add_c($a[3],$b[6],$c0,$c1,$c2); | ||
| 124 | &mul_add_c($a[4],$b[5],$c0,$c1,$c2); | ||
| 125 | &mul_add_c($a[5],$b[4],$c0,$c1,$c2); | ||
| 126 | &mul_add_c($a[6],$b[3],$c0,$c1,$c2); | ||
| 127 | &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]); | ||
| 128 | &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 129 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 130 | &mov("zero",$c2); | ||
| 131 | |||
| 132 | &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]); | ||
| 133 | &mul_add_c($a[4],$b[6],$c0,$c1,$c2); | ||
| 134 | &mul_add_c($a[5],$b[5],$c0,$c1,$c2); | ||
| 135 | &mul_add_c($a[6],$b[4],$c0,$c1,$c2); | ||
| 136 | &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]); | ||
| 137 | &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 138 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 139 | &mov("zero",$c2); | ||
| 140 | |||
| 141 | &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]); | ||
| 142 | &mul_add_c($a[5],$b[6],$c0,$c1,$c2); | ||
| 143 | &mul_add_c($a[6],$b[5],$c0,$c1,$c2); | ||
| 144 | &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]); | ||
| 145 | &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 146 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 147 | &mov("zero",$c2); | ||
| 148 | |||
| 149 | &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]); | ||
| 150 | &mul_add_c($a[6],$b[6],$c0,$c1,$c2); | ||
| 151 | &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]); | ||
| 152 | &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 153 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 154 | &mov("zero",$c2); | ||
| 155 | |||
| 156 | &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]); | ||
| 157 | &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]); | ||
| 158 | &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1); | ||
| 159 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 160 | &mov("zero",$c2); | ||
| 161 | |||
| 162 | &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]); | ||
| 163 | &st($c0,&QWPw(14,$rp)); | ||
| 164 | &st($c1,&QWPw(15,$rp)); | ||
| 165 | |||
| 166 | &FR($c0,$c1,$c2); | ||
| 167 | |||
| 168 | &ld($reg_s0,&swtmp(0)); | ||
| 169 | &ld($reg_s1,&swtmp(1)); | ||
| 170 | &stack_pop(2); | ||
| 171 | |||
| 172 | &function_end($name); | ||
| 173 | |||
| 174 | &fin_pool; | ||
| 175 | } | ||
| 176 | |||
| 177 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr.pl b/src/lib/libcrypto/bn/asm/alpha/sqr.pl new file mode 100644 index 0000000000..a55b696906 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/sqr.pl | |||
| @@ -0,0 +1,113 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_sqr_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r,$couny); | ||
| 8 | |||
| 9 | &init_pool(3); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $count=&wparam(2); | ||
| 15 | |||
| 16 | &function_begin($name,""); | ||
| 17 | |||
| 18 | &comment(""); | ||
| 19 | &sub($count,4,$count); | ||
| 20 | &mov("zero",$cc); | ||
| 21 | &br(&label("finish")); | ||
| 22 | &blt($count,&label("finish")); | ||
| 23 | |||
| 24 | ($a0,$r0)=&NR(2); | ||
| 25 | &ld($a0,&QWPw(0,$ap)); | ||
| 26 | &ld($r0,&QWPw(0,$rp)); | ||
| 27 | |||
| 28 | $a=<<'EOF'; | ||
| 29 | ########################################################## | ||
| 30 | &set_label("loop"); | ||
| 31 | |||
| 32 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
| 33 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
| 34 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
| 35 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
| 36 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
| 37 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
| 38 | |||
| 39 | ($o0,$t0)=&NR(2); | ||
| 40 | &add($a0,$b0,$o0); | ||
| 41 | &cmpult($o0,$b0,$t0); | ||
| 42 | &add($o0,$cc,$o0); | ||
| 43 | &cmpult($o0,$cc,$cc); | ||
| 44 | &add($cc,$t0,$cc); &FR($t0); | ||
| 45 | |||
| 46 | ($t1,$o1)=&NR(2); | ||
| 47 | |||
| 48 | &add($a1,$b1,$o1); &FR($a1); | ||
| 49 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
| 50 | &add($o1,$cc,$o1); | ||
| 51 | &cmpult($o1,$cc,$cc); | ||
| 52 | &add($cc,$t1,$cc); &FR($t1); | ||
| 53 | |||
| 54 | ($t2,$o2)=&NR(2); | ||
| 55 | |||
| 56 | &add($a2,$b2,$o2); &FR($a2); | ||
| 57 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
| 58 | &add($o2,$cc,$o2); | ||
| 59 | &cmpult($o2,$cc,$cc); | ||
| 60 | &add($cc,$t2,$cc); &FR($t2); | ||
| 61 | |||
| 62 | ($t3,$o3)=&NR(2); | ||
| 63 | |||
| 64 | &add($a3,$b3,$o3); &FR($a3); | ||
| 65 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
| 66 | &add($o3,$cc,$o3); | ||
| 67 | &cmpult($o3,$cc,$cc); | ||
| 68 | &add($cc,$t3,$cc); &FR($t3); | ||
| 69 | |||
| 70 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
| 71 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
| 72 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
| 73 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
| 74 | |||
| 75 | &sub($count,4,$count); # count-=4 | ||
| 76 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 77 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 78 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 79 | |||
| 80 | &blt($count,&label("finish")); | ||
| 81 | &ld($a0,&QWPw(0,$ap)); | ||
| 82 | &ld($b0,&QWPw(0,$bp)); | ||
| 83 | &br(&label("loop")); | ||
| 84 | EOF | ||
| 85 | ################################################## | ||
| 86 | # Do the last 0..3 words | ||
| 87 | |||
| 88 | &set_label("last_loop"); | ||
| 89 | |||
| 90 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
| 91 | &mul($a0,$a0,($l0)=&NR(1)); | ||
| 92 | &add($ap,$QWS,$ap); | ||
| 93 | &add($rp,2*$QWS,$rp); | ||
| 94 | &sub($count,1,$count); | ||
| 95 | &muh($a0,$a0,($h0)=&NR(1)); &FR($a0); | ||
| 96 | &st($l0,&QWPw(-2,$rp)); &FR($l0); | ||
| 97 | &st($h0,&QWPw(-1,$rp)); &FR($h0); | ||
| 98 | |||
| 99 | &bgt($count,&label("last_loop")); | ||
| 100 | &function_end_A($name); | ||
| 101 | |||
| 102 | ###################################################### | ||
| 103 | &set_label("finish"); | ||
| 104 | &add($count,4,$count); | ||
| 105 | &bgt($count,&label("last_loop")); | ||
| 106 | |||
| 107 | &set_label("end"); | ||
| 108 | &function_end($name); | ||
| 109 | |||
| 110 | &fin_pool; | ||
| 111 | } | ||
| 112 | |||
| 113 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl b/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl new file mode 100644 index 0000000000..bf33f5b503 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl | |||
| @@ -0,0 +1,109 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub sqr_add_c | ||
| 5 | { | ||
| 6 | local($a,$c0,$c1,$c2)=@_; | ||
| 7 | local($l1,$h1,$t1,$t2); | ||
| 8 | |||
| 9 | &mul($a,$a,($l1)=&NR(1)); | ||
| 10 | &muh($a,$a,($h1)=&NR(1)); | ||
| 11 | &add($c0,$l1,$c0); | ||
| 12 | &add($c1,$h1,$c1); | ||
| 13 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
| 14 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
| 15 | &add($c1,$t1,$c1); &FR($t1); | ||
| 16 | &add($c2,$t2,$c2); &FR($t2); | ||
| 17 | } | ||
| 18 | |||
| 19 | sub sqr_add_c2 | ||
| 20 | { | ||
| 21 | local($a,$b,$c0,$c1,$c2)=@_; | ||
| 22 | local($l1,$h1,$t1,$t2); | ||
| 23 | |||
| 24 | &mul($a,$b,($l1)=&NR(1)); | ||
| 25 | &muh($a,$b,($h1)=&NR(1)); | ||
| 26 | &cmplt($l1,"zero",($lc1)=&NR(1)); | ||
| 27 | &cmplt($h1,"zero",($hc1)=&NR(1)); | ||
| 28 | &add($l1,$l1,$l1); | ||
| 29 | &add($h1,$h1,$h1); | ||
| 30 | &add($h1,$lc1,$h1); &FR($lc1); | ||
| 31 | &add($c2,$hc1,$c2); &FR($hc1); | ||
| 32 | |||
| 33 | &add($c0,$l1,$c0); | ||
| 34 | &add($c1,$h1,$c1); | ||
| 35 | &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1); | ||
| 36 | &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1); | ||
| 37 | |||
| 38 | &add($c1,$lc1,$c1); &FR($lc1); | ||
| 39 | &add($c2,$hc1,$c2); &FR($hc1); | ||
| 40 | } | ||
| 41 | |||
| 42 | |||
| 43 | sub bn_sqr_comba4 | ||
| 44 | { | ||
| 45 | local($name)=@_; | ||
| 46 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 47 | |||
| 48 | $cnt=1; | ||
| 49 | &init_pool(2); | ||
| 50 | |||
| 51 | $rp=&wparam(0); | ||
| 52 | $ap=&wparam(1); | ||
| 53 | |||
| 54 | &function_begin($name,""); | ||
| 55 | |||
| 56 | &comment(""); | ||
| 57 | |||
| 58 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 59 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 60 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 61 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); | ||
| 62 | |||
| 63 | ($c0,$c1,$c2)=&NR(3); | ||
| 64 | |||
| 65 | &mov("zero",$c2); | ||
| 66 | &mul($a[0],$a[0],$c0); | ||
| 67 | &muh($a[0],$a[0],$c1); | ||
| 68 | &st($c0,&QWPw(0,$rp)); | ||
| 69 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 70 | &mov("zero",$c2); | ||
| 71 | |||
| 72 | &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2); | ||
| 73 | &st($c0,&QWPw(1,$rp)); | ||
| 74 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 75 | &mov("zero",$c2); | ||
| 76 | |||
| 77 | &sqr_add_c($a[1],$c0,$c1,$c2); | ||
| 78 | &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); | ||
| 79 | &st($c0,&QWPw(2,$rp)); | ||
| 80 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 81 | &mov("zero",$c2); | ||
| 82 | |||
| 83 | &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); | ||
| 84 | &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); | ||
| 85 | &st($c0,&QWPw(3,$rp)); | ||
| 86 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 87 | &mov("zero",$c2); | ||
| 88 | |||
| 89 | &sqr_add_c($a[2],$c0,$c1,$c2); | ||
| 90 | &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); | ||
| 91 | &st($c0,&QWPw(4,$rp)); | ||
| 92 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 93 | &mov("zero",$c2); | ||
| 94 | |||
| 95 | &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); | ||
| 96 | &st($c0,&QWPw(5,$rp)); | ||
| 97 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 98 | &mov("zero",$c2); | ||
| 99 | |||
| 100 | &sqr_add_c($a[3],$c0,$c1,$c2); | ||
| 101 | &st($c0,&QWPw(6,$rp)); | ||
| 102 | &st($c1,&QWPw(7,$rp)); | ||
| 103 | |||
| 104 | &function_end($name); | ||
| 105 | |||
| 106 | &fin_pool; | ||
| 107 | } | ||
| 108 | |||
| 109 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl b/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl new file mode 100644 index 0000000000..b4afe085f1 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl | |||
| @@ -0,0 +1,132 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_sqr_comba8 | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local(@a,@b,$r,$c0,$c1,$c2); | ||
| 8 | |||
| 9 | $cnt=1; | ||
| 10 | &init_pool(2); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | |||
| 15 | &function_begin($name,""); | ||
| 16 | |||
| 17 | &comment(""); | ||
| 18 | |||
| 19 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
| 20 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
| 21 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
| 22 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
| 23 | &ld(($a[4])=&NR(1),&QWPw(4,$ap)); | ||
| 24 | &ld(($a[5])=&NR(1),&QWPw(5,$ap)); | ||
| 25 | &ld(($a[6])=&NR(1),&QWPw(6,$ap)); | ||
| 26 | &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap); | ||
| 27 | |||
| 28 | ($c0,$c1,$c2)=&NR(3); | ||
| 29 | |||
| 30 | &mov("zero",$c2); | ||
| 31 | &mul($a[0],$a[0],$c0); | ||
| 32 | &muh($a[0],$a[0],$c1); | ||
| 33 | &st($c0,&QWPw(0,$rp)); | ||
| 34 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 35 | &mov("zero",$c2); | ||
| 36 | |||
| 37 | &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2); | ||
| 38 | &st($c0,&QWPw(1,$rp)); | ||
| 39 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 40 | &mov("zero",$c2); | ||
| 41 | |||
| 42 | &sqr_add_c($a[1],$c0,$c1,$c2); | ||
| 43 | &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); | ||
| 44 | &st($c0,&QWPw(2,$rp)); | ||
| 45 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 46 | &mov("zero",$c2); | ||
| 47 | |||
| 48 | &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); | ||
| 49 | &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); | ||
| 50 | &st($c0,&QWPw(3,$rp)); | ||
| 51 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 52 | &mov("zero",$c2); | ||
| 53 | |||
| 54 | &sqr_add_c($a[2],$c0,$c1,$c2); | ||
| 55 | &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); | ||
| 56 | &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2); | ||
| 57 | &st($c0,&QWPw(4,$rp)); | ||
| 58 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 59 | &mov("zero",$c2); | ||
| 60 | |||
| 61 | &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); | ||
| 62 | &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2); | ||
| 63 | &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2); | ||
| 64 | &st($c0,&QWPw(5,$rp)); | ||
| 65 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 66 | &mov("zero",$c2); | ||
| 67 | |||
| 68 | &sqr_add_c($a[3],$c0,$c1,$c2); | ||
| 69 | &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2); | ||
| 70 | &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2); | ||
| 71 | &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2); | ||
| 72 | &st($c0,&QWPw(6,$rp)); | ||
| 73 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 74 | &mov("zero",$c2); | ||
| 75 | |||
| 76 | &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2); | ||
| 77 | &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2); | ||
| 78 | &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2); | ||
| 79 | &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2); | ||
| 80 | &st($c0,&QWPw(7,$rp)); | ||
| 81 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 82 | &mov("zero",$c2); | ||
| 83 | |||
| 84 | &sqr_add_c($a[4],$c0,$c1,$c2); | ||
| 85 | &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2); | ||
| 86 | &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2); | ||
| 87 | &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2); | ||
| 88 | &st($c0,&QWPw(8,$rp)); | ||
| 89 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 90 | &mov("zero",$c2); | ||
| 91 | |||
| 92 | &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2); | ||
| 93 | &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2); | ||
| 94 | &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2); | ||
| 95 | &st($c0,&QWPw(9,$rp)); | ||
| 96 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 97 | &mov("zero",$c2); | ||
| 98 | |||
| 99 | &sqr_add_c($a[5],$c0,$c1,$c2); | ||
| 100 | &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2); | ||
| 101 | &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2); | ||
| 102 | &st($c0,&QWPw(10,$rp)); | ||
| 103 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 104 | &mov("zero",$c2); | ||
| 105 | |||
| 106 | &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2); | ||
| 107 | &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2); | ||
| 108 | &st($c0,&QWPw(11,$rp)); | ||
| 109 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 110 | &mov("zero",$c2); | ||
| 111 | |||
| 112 | &sqr_add_c($a[6],$c0,$c1,$c2); | ||
| 113 | &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2); | ||
| 114 | &st($c0,&QWPw(12,$rp)); | ||
| 115 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 116 | &mov("zero",$c2); | ||
| 117 | |||
| 118 | &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2); | ||
| 119 | &st($c0,&QWPw(13,$rp)); | ||
| 120 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 121 | &mov("zero",$c2); | ||
| 122 | |||
| 123 | &sqr_add_c($a[7],$c0,$c1,$c2); | ||
| 124 | &st($c0,&QWPw(14,$rp)); | ||
| 125 | &st($c1,&QWPw(15,$rp)); | ||
| 126 | |||
| 127 | &function_end($name); | ||
| 128 | |||
| 129 | &fin_pool; | ||
| 130 | } | ||
| 131 | |||
| 132 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/sub.pl b/src/lib/libcrypto/bn/asm/alpha/sub.pl new file mode 100644 index 0000000000..d998da5c21 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/sub.pl | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # alpha assember | ||
| 3 | |||
| 4 | sub bn_sub_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | local($cc,$a,$b,$r); | ||
| 8 | |||
| 9 | &init_pool(4); | ||
| 10 | ($cc)=GR("r0"); | ||
| 11 | |||
| 12 | $rp=&wparam(0); | ||
| 13 | $ap=&wparam(1); | ||
| 14 | $bp=&wparam(2); | ||
| 15 | $count=&wparam(3); | ||
| 16 | |||
| 17 | &function_begin($name,""); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &sub($count,4,$count); | ||
| 21 | &mov("zero",$cc); | ||
| 22 | &blt($count,&label("finish")); | ||
| 23 | |||
| 24 | ($a0,$b0)=&NR(2); | ||
| 25 | &ld($a0,&QWPw(0,$ap)); | ||
| 26 | &ld($b0,&QWPw(0,$bp)); | ||
| 27 | |||
| 28 | ########################################################## | ||
| 29 | &set_label("loop"); | ||
| 30 | |||
| 31 | ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8); | ||
| 32 | &ld($a1,&QWPw(1,$ap)); | ||
| 33 | &cmpult($a0,$b0,$tmp); # will we borrow? | ||
| 34 | &ld($b1,&QWPw(1,$bp)); | ||
| 35 | &sub($a0,$b0,$a0); # do the subtract | ||
| 36 | &ld($a2,&QWPw(2,$ap)); | ||
| 37 | &cmpult($a0,$cc,$b0); # will we borrow? | ||
| 38 | &ld($b2,&QWPw(2,$bp)); | ||
| 39 | &sub($a0,$cc,$o0); # will we borrow? | ||
| 40 | &ld($a3,&QWPw(3,$ap)); | ||
| 41 | &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp); | ||
| 42 | |||
| 43 | &cmpult($a1,$b1,$t1); # will we borrow? | ||
| 44 | &sub($a1,$b1,$a1); # do the subtract | ||
| 45 | &ld($b3,&QWPw(3,$bp)); | ||
| 46 | &cmpult($a1,$cc,$b1); # will we borrow? | ||
| 47 | &sub($a1,$cc,$o1); # will we borrow? | ||
| 48 | &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1); | ||
| 49 | |||
| 50 | &cmpult($a2,$b2,$tmp); # will we borrow? | ||
| 51 | &sub($a2,$b2,$a2); # do the subtract | ||
| 52 | &st($o0,&QWPw(0,$rp)); &FR($o0); # save | ||
| 53 | &cmpult($a2,$cc,$b2); # will we borrow? | ||
| 54 | &sub($a2,$cc,$o2); # will we borrow? | ||
| 55 | &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2); | ||
| 56 | |||
| 57 | &cmpult($a3,$b3,$t3); # will we borrow? | ||
| 58 | &sub($a3,$b3,$a3); # do the subtract | ||
| 59 | &st($o1,&QWPw(1,$rp)); &FR($o1); | ||
| 60 | &cmpult($a3,$cc,$b3); # will we borrow? | ||
| 61 | &sub($a3,$cc,$o3); # will we borrow? | ||
| 62 | &add($b3,$t3,$cc); &FR($t3,$a3,$b3); | ||
| 63 | |||
| 64 | &st($o2,&QWPw(2,$rp)); &FR($o2); | ||
| 65 | &sub($count,4,$count); # count-=4 | ||
| 66 | &st($o3,&QWPw(3,$rp)); &FR($o3); | ||
| 67 | &add($ap,4*$QWS,$ap); # count+=4 | ||
| 68 | &add($bp,4*$QWS,$bp); # count+=4 | ||
| 69 | &add($rp,4*$QWS,$rp); # count+=4 | ||
| 70 | |||
| 71 | &blt($count,&label("finish")); | ||
| 72 | &ld($a0,&QWPw(0,$ap)); | ||
| 73 | &ld($b0,&QWPw(0,$bp)); | ||
| 74 | &br(&label("loop")); | ||
| 75 | ################################################## | ||
| 76 | # Do the last 0..3 words | ||
| 77 | |||
| 78 | &set_label("last_loop"); | ||
| 79 | |||
| 80 | &ld($a0,&QWPw(0,$ap)); # get a | ||
| 81 | &ld($b0,&QWPw(0,$bp)); # get b | ||
| 82 | &cmpult($a0,$b0,$tmp); # will we borrow? | ||
| 83 | &sub($a0,$b0,$a0); # do the subtract | ||
| 84 | &cmpult($a0,$cc,$b0); # will we borrow? | ||
| 85 | &sub($a0,$cc,$a0); # will we borrow? | ||
| 86 | &st($a0,&QWPw(0,$rp)); # save | ||
| 87 | &add($b0,$tmp,$cc); # add the borrows | ||
| 88 | |||
| 89 | &add($ap,$QWS,$ap); | ||
| 90 | &add($bp,$QWS,$bp); | ||
| 91 | &add($rp,$QWS,$rp); | ||
| 92 | &sub($count,1,$count); | ||
| 93 | &bgt($count,&label("last_loop")); | ||
| 94 | &function_end_A($name); | ||
| 95 | |||
| 96 | ###################################################### | ||
| 97 | &set_label("finish"); | ||
| 98 | &add($count,4,$count); | ||
| 99 | &bgt($count,&label("last_loop")); | ||
| 100 | |||
| 101 | &FR($a0,$b0); | ||
| 102 | &set_label("end"); | ||
| 103 | &function_end($name); | ||
| 104 | |||
| 105 | &fin_pool; | ||
| 106 | } | ||
| 107 | |||
| 108 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/add.pl b/src/lib/libcrypto/bn/asm/x86/add.pl new file mode 100644 index 0000000000..0b5cf583e3 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/add.pl | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # x86 assember | ||
| 3 | |||
| 4 | sub bn_add_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | |||
| 8 | &function_begin($name,""); | ||
| 9 | |||
| 10 | &comment(""); | ||
| 11 | $a="esi"; | ||
| 12 | $b="edi"; | ||
| 13 | $c="eax"; | ||
| 14 | $r="ebx"; | ||
| 15 | $tmp1="ecx"; | ||
| 16 | $tmp2="edx"; | ||
| 17 | $num="ebp"; | ||
| 18 | |||
| 19 | &mov($r,&wparam(0)); # get r | ||
| 20 | &mov($a,&wparam(1)); # get a | ||
| 21 | &mov($b,&wparam(2)); # get b | ||
| 22 | &mov($num,&wparam(3)); # get num | ||
| 23 | &xor($c,$c); # clear carry | ||
| 24 | &and($num,0xfffffff8); # num / 8 | ||
| 25 | |||
| 26 | &jz(&label("aw_finish")); | ||
| 27 | |||
| 28 | &set_label("aw_loop",0); | ||
| 29 | for ($i=0; $i<8; $i++) | ||
| 30 | { | ||
| 31 | &comment("Round $i"); | ||
| 32 | |||
| 33 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
| 34 | &mov($tmp2,&DWP($i*4,$b,"",0)); # *b | ||
| 35 | &add($tmp1,$c); | ||
| 36 | &mov($c,0); | ||
| 37 | &adc($c,$c); | ||
| 38 | &add($tmp1,$tmp2); | ||
| 39 | &adc($c,0); | ||
| 40 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
| 41 | } | ||
| 42 | |||
| 43 | &comment(""); | ||
| 44 | &add($a,32); | ||
| 45 | &add($b,32); | ||
| 46 | &add($r,32); | ||
| 47 | &sub($num,8); | ||
| 48 | &jnz(&label("aw_loop")); | ||
| 49 | |||
| 50 | &set_label("aw_finish",0); | ||
| 51 | &mov($num,&wparam(3)); # get num | ||
| 52 | &and($num,7); | ||
| 53 | &jz(&label("aw_end")); | ||
| 54 | |||
| 55 | for ($i=0; $i<7; $i++) | ||
| 56 | { | ||
| 57 | &comment("Tail Round $i"); | ||
| 58 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
| 59 | &mov($tmp2,&DWP($i*4,$b,"",0));# *b | ||
| 60 | &add($tmp1,$c); | ||
| 61 | &mov($c,0); | ||
| 62 | &adc($c,$c); | ||
| 63 | &add($tmp1,$tmp2); | ||
| 64 | &adc($c,0); | ||
| 65 | &dec($num) if ($i != 6); | ||
| 66 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *a | ||
| 67 | &jz(&label("aw_end")) if ($i != 6); | ||
| 68 | } | ||
| 69 | &set_label("aw_end",0); | ||
| 70 | |||
| 71 | # &mov("eax",$c); # $c is "eax" | ||
| 72 | |||
| 73 | &function_end($name); | ||
| 74 | } | ||
| 75 | |||
| 76 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/comba.pl b/src/lib/libcrypto/bn/asm/x86/comba.pl new file mode 100644 index 0000000000..2291253629 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/comba.pl | |||
| @@ -0,0 +1,277 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # x86 assember | ||
| 3 | |||
| 4 | sub mul_add_c | ||
| 5 | { | ||
| 6 | local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | ||
| 7 | |||
| 8 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next | ||
| 9 | # words, and 1 if load return value | ||
| 10 | |||
| 11 | &comment("mul a[$ai]*b[$bi]"); | ||
| 12 | |||
| 13 | # "eax" and "edx" will always be pre-loaded. | ||
| 14 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; | ||
| 15 | # &mov("edx",&DWP($bi*4,$b,"",0)); | ||
| 16 | |||
| 17 | &mul("edx"); | ||
| 18 | &add($c0,"eax"); | ||
| 19 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a | ||
| 20 | &mov("eax",&wparam(0)) if $pos > 0; # load r[] | ||
| 21 | ### | ||
| 22 | &adc($c1,"edx"); | ||
| 23 | &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b | ||
| 24 | &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b | ||
| 25 | ### | ||
| 26 | &adc($c2,0); | ||
| 27 | # is pos > 1, it means it is the last loop | ||
| 28 | &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; | ||
| 29 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a | ||
| 30 | } | ||
| 31 | |||
| 32 | sub sqr_add_c | ||
| 33 | { | ||
| 34 | local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | ||
| 35 | |||
| 36 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next | ||
| 37 | # words, and 1 if load return value | ||
| 38 | |||
| 39 | &comment("sqr a[$ai]*a[$bi]"); | ||
| 40 | |||
| 41 | # "eax" and "edx" will always be pre-loaded. | ||
| 42 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; | ||
| 43 | # &mov("edx",&DWP($bi*4,$b,"",0)); | ||
| 44 | |||
| 45 | if ($ai == $bi) | ||
| 46 | { &mul("eax");} | ||
| 47 | else | ||
| 48 | { &mul("edx");} | ||
| 49 | &add($c0,"eax"); | ||
| 50 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a | ||
| 51 | ### | ||
| 52 | &adc($c1,"edx"); | ||
| 53 | &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); | ||
| 54 | ### | ||
| 55 | &adc($c2,0); | ||
| 56 | # is pos > 1, it means it is the last loop | ||
| 57 | &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; | ||
| 58 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b | ||
| 59 | } | ||
| 60 | |||
| 61 | sub sqr_add_c2 | ||
| 62 | { | ||
| 63 | local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | ||
| 64 | |||
| 65 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next | ||
| 66 | # words, and 1 if load return value | ||
| 67 | |||
| 68 | &comment("sqr a[$ai]*a[$bi]"); | ||
| 69 | |||
| 70 | # "eax" and "edx" will always be pre-loaded. | ||
| 71 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; | ||
| 72 | # &mov("edx",&DWP($bi*4,$a,"",0)); | ||
| 73 | |||
| 74 | if ($ai == $bi) | ||
| 75 | { &mul("eax");} | ||
| 76 | else | ||
| 77 | { &mul("edx");} | ||
| 78 | &add("eax","eax"); | ||
| 79 | ### | ||
| 80 | &adc("edx","edx"); | ||
| 81 | ### | ||
| 82 | &adc($c2,0); | ||
| 83 | &add($c0,"eax"); | ||
| 84 | &adc($c1,"edx"); | ||
| 85 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a | ||
| 86 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b | ||
| 87 | &adc($c2,0); | ||
| 88 | &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; | ||
| 89 | &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb); | ||
| 90 | ### | ||
| 91 | } | ||
| 92 | |||
| 93 | sub bn_mul_comba | ||
| 94 | { | ||
| 95 | local($name,$num)=@_; | ||
| 96 | local($a,$b,$c0,$c1,$c2); | ||
| 97 | local($i,$as,$ae,$bs,$be,$ai,$bi); | ||
| 98 | local($tot,$end); | ||
| 99 | |||
| 100 | &function_begin_B($name,""); | ||
| 101 | |||
| 102 | $c0="ebx"; | ||
| 103 | $c1="ecx"; | ||
| 104 | $c2="ebp"; | ||
| 105 | $a="esi"; | ||
| 106 | $b="edi"; | ||
| 107 | |||
| 108 | $as=0; | ||
| 109 | $ae=0; | ||
| 110 | $bs=0; | ||
| 111 | $be=0; | ||
| 112 | $tot=$num+$num-1; | ||
| 113 | |||
| 114 | &push("esi"); | ||
| 115 | &mov($a,&wparam(1)); | ||
| 116 | &push("edi"); | ||
| 117 | &mov($b,&wparam(2)); | ||
| 118 | &push("ebp"); | ||
| 119 | &push("ebx"); | ||
| 120 | |||
| 121 | &xor($c0,$c0); | ||
| 122 | &mov("eax",&DWP(0,$a,"",0)); # load the first word | ||
| 123 | &xor($c1,$c1); | ||
| 124 | &mov("edx",&DWP(0,$b,"",0)); # load the first second | ||
| 125 | |||
| 126 | for ($i=0; $i<$tot; $i++) | ||
| 127 | { | ||
| 128 | $ai=$as; | ||
| 129 | $bi=$bs; | ||
| 130 | $end=$be+1; | ||
| 131 | |||
| 132 | &comment("################## Calculate word $i"); | ||
| 133 | |||
| 134 | for ($j=$bs; $j<$end; $j++) | ||
| 135 | { | ||
| 136 | &xor($c2,$c2) if ($j == $bs); | ||
| 137 | if (($j+1) == $end) | ||
| 138 | { | ||
| 139 | $v=1; | ||
| 140 | $v=2 if (($i+1) == $tot); | ||
| 141 | } | ||
| 142 | else | ||
| 143 | { $v=0; } | ||
| 144 | if (($j+1) != $end) | ||
| 145 | { | ||
| 146 | $na=($ai-1); | ||
| 147 | $nb=($bi+1); | ||
| 148 | } | ||
| 149 | else | ||
| 150 | { | ||
| 151 | $na=$as+($i < ($num-1)); | ||
| 152 | $nb=$bs+($i >= ($num-1)); | ||
| 153 | } | ||
| 154 | #printf STDERR "[$ai,$bi] -> [$na,$nb]\n"; | ||
| 155 | &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb); | ||
| 156 | if ($v) | ||
| 157 | { | ||
| 158 | &comment("saved r[$i]"); | ||
| 159 | # &mov("eax",&wparam(0)); | ||
| 160 | # &mov(&DWP($i*4,"eax","",0),$c0); | ||
| 161 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 162 | } | ||
| 163 | $ai--; | ||
| 164 | $bi++; | ||
| 165 | } | ||
| 166 | $as++ if ($i < ($num-1)); | ||
| 167 | $ae++ if ($i >= ($num-1)); | ||
| 168 | |||
| 169 | $bs++ if ($i >= ($num-1)); | ||
| 170 | $be++ if ($i < ($num-1)); | ||
| 171 | } | ||
| 172 | &comment("save r[$i]"); | ||
| 173 | # &mov("eax",&wparam(0)); | ||
| 174 | &mov(&DWP($i*4,"eax","",0),$c0); | ||
| 175 | |||
| 176 | &pop("ebx"); | ||
| 177 | &pop("ebp"); | ||
| 178 | &pop("edi"); | ||
| 179 | &pop("esi"); | ||
| 180 | &ret(); | ||
| 181 | &function_end_B($name); | ||
| 182 | } | ||
| 183 | |||
| 184 | sub bn_sqr_comba | ||
| 185 | { | ||
| 186 | local($name,$num)=@_; | ||
| 187 | local($r,$a,$c0,$c1,$c2)=@_; | ||
| 188 | local($i,$as,$ae,$bs,$be,$ai,$bi); | ||
| 189 | local($b,$tot,$end,$half); | ||
| 190 | |||
| 191 | &function_begin_B($name,""); | ||
| 192 | |||
| 193 | $c0="ebx"; | ||
| 194 | $c1="ecx"; | ||
| 195 | $c2="ebp"; | ||
| 196 | $a="esi"; | ||
| 197 | $r="edi"; | ||
| 198 | |||
| 199 | &push("esi"); | ||
| 200 | &push("edi"); | ||
| 201 | &push("ebp"); | ||
| 202 | &push("ebx"); | ||
| 203 | &mov($r,&wparam(0)); | ||
| 204 | &mov($a,&wparam(1)); | ||
| 205 | &xor($c0,$c0); | ||
| 206 | &xor($c1,$c1); | ||
| 207 | &mov("eax",&DWP(0,$a,"",0)); # load the first word | ||
| 208 | |||
| 209 | $as=0; | ||
| 210 | $ae=0; | ||
| 211 | $bs=0; | ||
| 212 | $be=0; | ||
| 213 | $tot=$num+$num-1; | ||
| 214 | |||
| 215 | for ($i=0; $i<$tot; $i++) | ||
| 216 | { | ||
| 217 | $ai=$as; | ||
| 218 | $bi=$bs; | ||
| 219 | $end=$be+1; | ||
| 220 | |||
| 221 | &comment("############### Calculate word $i"); | ||
| 222 | for ($j=$bs; $j<$end; $j++) | ||
| 223 | { | ||
| 224 | &xor($c2,$c2) if ($j == $bs); | ||
| 225 | if (($ai-1) < ($bi+1)) | ||
| 226 | { | ||
| 227 | $v=1; | ||
| 228 | $v=2 if ($i+1) == $tot; | ||
| 229 | } | ||
| 230 | else | ||
| 231 | { $v=0; } | ||
| 232 | if (!$v) | ||
| 233 | { | ||
| 234 | $na=$ai-1; | ||
| 235 | $nb=$bi+1; | ||
| 236 | } | ||
| 237 | else | ||
| 238 | { | ||
| 239 | $na=$as+($i < ($num-1)); | ||
| 240 | $nb=$bs+($i >= ($num-1)); | ||
| 241 | } | ||
| 242 | if ($ai == $bi) | ||
| 243 | { | ||
| 244 | &sqr_add_c($r,$a,$ai,$bi, | ||
| 245 | $c0,$c1,$c2,$v,$i,$na,$nb); | ||
| 246 | } | ||
| 247 | else | ||
| 248 | { | ||
| 249 | &sqr_add_c2($r,$a,$ai,$bi, | ||
| 250 | $c0,$c1,$c2,$v,$i,$na,$nb); | ||
| 251 | } | ||
| 252 | if ($v) | ||
| 253 | { | ||
| 254 | &comment("saved r[$i]"); | ||
| 255 | #&mov(&DWP($i*4,$r,"",0),$c0); | ||
| 256 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
| 257 | last; | ||
| 258 | } | ||
| 259 | $ai--; | ||
| 260 | $bi++; | ||
| 261 | } | ||
| 262 | $as++ if ($i < ($num-1)); | ||
| 263 | $ae++ if ($i >= ($num-1)); | ||
| 264 | |||
| 265 | $bs++ if ($i >= ($num-1)); | ||
| 266 | $be++ if ($i < ($num-1)); | ||
| 267 | } | ||
| 268 | &mov(&DWP($i*4,$r,"",0),$c0); | ||
| 269 | &pop("ebx"); | ||
| 270 | &pop("ebp"); | ||
| 271 | &pop("edi"); | ||
| 272 | &pop("esi"); | ||
| 273 | &ret(); | ||
| 274 | &function_end_B($name); | ||
| 275 | } | ||
| 276 | |||
| 277 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/div.pl b/src/lib/libcrypto/bn/asm/x86/div.pl new file mode 100644 index 0000000000..0e90152caa --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/div.pl | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # x86 assember | ||
| 3 | |||
| 4 | sub bn_div_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | |||
| 8 | &function_begin($name,""); | ||
| 9 | &mov("edx",&wparam(0)); # | ||
| 10 | &mov("eax",&wparam(1)); # | ||
| 11 | &mov("ebx",&wparam(2)); # | ||
| 12 | &div("ebx"); | ||
| 13 | &function_end($name); | ||
| 14 | } | ||
| 15 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/f b/src/lib/libcrypto/bn/asm/x86/f new file mode 100644 index 0000000000..22e4112224 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/f | |||
| @@ -0,0 +1,3 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # x86 assember | ||
| 3 | |||
diff --git a/src/lib/libcrypto/bn/asm/x86/mul.pl b/src/lib/libcrypto/bn/asm/x86/mul.pl new file mode 100644 index 0000000000..674cb9b055 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/mul.pl | |||
| @@ -0,0 +1,77 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # x86 assember | ||
| 3 | |||
| 4 | sub bn_mul_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | |||
| 8 | &function_begin($name,""); | ||
| 9 | |||
| 10 | &comment(""); | ||
| 11 | $Low="eax"; | ||
| 12 | $High="edx"; | ||
| 13 | $a="ebx"; | ||
| 14 | $w="ecx"; | ||
| 15 | $r="edi"; | ||
| 16 | $c="esi"; | ||
| 17 | $num="ebp"; | ||
| 18 | |||
| 19 | &xor($c,$c); # clear carry | ||
| 20 | &mov($r,&wparam(0)); # | ||
| 21 | &mov($a,&wparam(1)); # | ||
| 22 | &mov($num,&wparam(2)); # | ||
| 23 | &mov($w,&wparam(3)); # | ||
| 24 | |||
| 25 | &and($num,0xfffffff8); # num / 8 | ||
| 26 | &jz(&label("mw_finish")); | ||
| 27 | |||
| 28 | &set_label("mw_loop",0); | ||
| 29 | for ($i=0; $i<32; $i+=4) | ||
| 30 | { | ||
| 31 | &comment("Round $i"); | ||
| 32 | |||
| 33 | &mov("eax",&DWP($i,$a,"",0)); # *a | ||
| 34 | &mul($w); # *a * w | ||
| 35 | &add("eax",$c); # L(t)+=c | ||
| 36 | # XXX | ||
| 37 | |||
| 38 | &adc("edx",0); # H(t)+=carry | ||
| 39 | &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); | ||
| 40 | |||
| 41 | &mov($c,"edx"); # c= H(t); | ||
| 42 | } | ||
| 43 | |||
| 44 | &comment(""); | ||
| 45 | &add($a,32); | ||
| 46 | &add($r,32); | ||
| 47 | &sub($num,8); | ||
| 48 | &jz(&label("mw_finish")); | ||
| 49 | &jmp(&label("mw_loop")); | ||
| 50 | |||
| 51 | &set_label("mw_finish",0); | ||
| 52 | &mov($num,&wparam(2)); # get num | ||
| 53 | &and($num,7); | ||
| 54 | &jnz(&label("mw_finish2")); | ||
| 55 | &jmp(&label("mw_end")); | ||
| 56 | |||
| 57 | &set_label("mw_finish2",1); | ||
| 58 | for ($i=0; $i<7; $i++) | ||
| 59 | { | ||
| 60 | &comment("Tail Round $i"); | ||
| 61 | &mov("eax",&DWP($i*4,$a,"",0));# *a | ||
| 62 | &mul($w); # *a * w | ||
| 63 | &add("eax",$c); # L(t)+=c | ||
| 64 | # XXX | ||
| 65 | &adc("edx",0); # H(t)+=carry | ||
| 66 | &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t); | ||
| 67 | &mov($c,"edx"); # c= H(t); | ||
| 68 | &dec($num) if ($i != 7-1); | ||
| 69 | &jz(&label("mw_end")) if ($i != 7-1); | ||
| 70 | } | ||
| 71 | &set_label("mw_end",0); | ||
| 72 | &mov("eax",$c); | ||
| 73 | |||
| 74 | &function_end($name); | ||
| 75 | } | ||
| 76 | |||
| 77 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/mul_add.pl b/src/lib/libcrypto/bn/asm/x86/mul_add.pl new file mode 100644 index 0000000000..61830d3a90 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/mul_add.pl | |||
| @@ -0,0 +1,87 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # x86 assember | ||
| 3 | |||
| 4 | sub bn_mul_add_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | |||
| 8 | &function_begin($name,""); | ||
| 9 | |||
| 10 | &comment(""); | ||
| 11 | $Low="eax"; | ||
| 12 | $High="edx"; | ||
| 13 | $a="ebx"; | ||
| 14 | $w="ebp"; | ||
| 15 | $r="edi"; | ||
| 16 | $c="esi"; | ||
| 17 | |||
| 18 | &xor($c,$c); # clear carry | ||
| 19 | &mov($r,&wparam(0)); # | ||
| 20 | |||
| 21 | &mov("ecx",&wparam(2)); # | ||
| 22 | &mov($a,&wparam(1)); # | ||
| 23 | |||
| 24 | &and("ecx",0xfffffff8); # num / 8 | ||
| 25 | &mov($w,&wparam(3)); # | ||
| 26 | |||
| 27 | &push("ecx"); # Up the stack for a tmp variable | ||
| 28 | |||
| 29 | &jz(&label("maw_finish")); | ||
| 30 | |||
| 31 | &set_label("maw_loop",0); | ||
| 32 | |||
| 33 | &mov(&swtmp(0),"ecx"); # | ||
| 34 | |||
| 35 | for ($i=0; $i<32; $i+=4) | ||
| 36 | { | ||
| 37 | &comment("Round $i"); | ||
| 38 | |||
| 39 | &mov("eax",&DWP($i,$a,"",0)); # *a | ||
| 40 | &mul($w); # *a * w | ||
| 41 | &add("eax",$c); # L(t)+= *r | ||
| 42 | &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r | ||
| 43 | &adc("edx",0); # H(t)+=carry | ||
| 44 | &add("eax",$c); # L(t)+=c | ||
| 45 | &adc("edx",0); # H(t)+=carry | ||
| 46 | &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); | ||
| 47 | &mov($c,"edx"); # c= H(t); | ||
| 48 | } | ||
| 49 | |||
| 50 | &comment(""); | ||
| 51 | &mov("ecx",&swtmp(0)); # | ||
| 52 | &add($a,32); | ||
| 53 | &add($r,32); | ||
| 54 | &sub("ecx",8); | ||
| 55 | &jnz(&label("maw_loop")); | ||
| 56 | |||
| 57 | &set_label("maw_finish",0); | ||
| 58 | &mov("ecx",&wparam(2)); # get num | ||
| 59 | &and("ecx",7); | ||
| 60 | &jnz(&label("maw_finish2")); # helps branch prediction | ||
| 61 | &jmp(&label("maw_end")); | ||
| 62 | |||
| 63 | &set_label("maw_finish2",1); | ||
| 64 | for ($i=0; $i<7; $i++) | ||
| 65 | { | ||
| 66 | &comment("Tail Round $i"); | ||
| 67 | &mov("eax",&DWP($i*4,$a,"",0));# *a | ||
| 68 | &mul($w); # *a * w | ||
| 69 | &add("eax",$c); # L(t)+=c | ||
| 70 | &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r | ||
| 71 | &adc("edx",0); # H(t)+=carry | ||
| 72 | &add("eax",$c); | ||
| 73 | &adc("edx",0); # H(t)+=carry | ||
| 74 | &dec("ecx") if ($i != 7-1); | ||
| 75 | &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t); | ||
| 76 | &mov($c,"edx"); # c= H(t); | ||
| 77 | &jz(&label("maw_end")) if ($i != 7-1); | ||
| 78 | } | ||
| 79 | &set_label("maw_end",0); | ||
| 80 | &mov("eax",$c); | ||
| 81 | |||
| 82 | &pop("ecx"); # clear variable from | ||
| 83 | |||
| 84 | &function_end($name); | ||
| 85 | } | ||
| 86 | |||
| 87 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/sqr.pl b/src/lib/libcrypto/bn/asm/x86/sqr.pl new file mode 100644 index 0000000000..1f90993cf6 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/sqr.pl | |||
| @@ -0,0 +1,60 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # x86 assember | ||
| 3 | |||
| 4 | sub bn_sqr_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | |||
| 8 | &function_begin($name,""); | ||
| 9 | |||
| 10 | &comment(""); | ||
| 11 | $r="esi"; | ||
| 12 | $a="edi"; | ||
| 13 | $num="ebx"; | ||
| 14 | |||
| 15 | &mov($r,&wparam(0)); # | ||
| 16 | &mov($a,&wparam(1)); # | ||
| 17 | &mov($num,&wparam(2)); # | ||
| 18 | |||
| 19 | &and($num,0xfffffff8); # num / 8 | ||
| 20 | &jz(&label("sw_finish")); | ||
| 21 | |||
| 22 | &set_label("sw_loop",0); | ||
| 23 | for ($i=0; $i<32; $i+=4) | ||
| 24 | { | ||
| 25 | &comment("Round $i"); | ||
| 26 | &mov("eax",&DWP($i,$a,"",0)); # *a | ||
| 27 | # XXX | ||
| 28 | &mul("eax"); # *a * *a | ||
| 29 | &mov(&DWP($i*2,$r,"",0),"eax"); # | ||
| 30 | &mov(&DWP($i*2+4,$r,"",0),"edx");# | ||
| 31 | } | ||
| 32 | |||
| 33 | &comment(""); | ||
| 34 | &add($a,32); | ||
| 35 | &add($r,64); | ||
| 36 | &sub($num,8); | ||
| 37 | &jnz(&label("sw_loop")); | ||
| 38 | |||
| 39 | &set_label("sw_finish",0); | ||
| 40 | &mov($num,&wparam(2)); # get num | ||
| 41 | &and($num,7); | ||
| 42 | &jz(&label("sw_end")); | ||
| 43 | |||
| 44 | for ($i=0; $i<7; $i++) | ||
| 45 | { | ||
| 46 | &comment("Tail Round $i"); | ||
| 47 | &mov("eax",&DWP($i*4,$a,"",0)); # *a | ||
| 48 | # XXX | ||
| 49 | &mul("eax"); # *a * *a | ||
| 50 | &mov(&DWP($i*8,$r,"",0),"eax"); # | ||
| 51 | &dec($num) if ($i != 7-1); | ||
| 52 | &mov(&DWP($i*8+4,$r,"",0),"edx"); | ||
| 53 | &jz(&label("sw_end")) if ($i != 7-1); | ||
| 54 | } | ||
| 55 | &set_label("sw_end",0); | ||
| 56 | |||
| 57 | &function_end($name); | ||
| 58 | } | ||
| 59 | |||
| 60 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/sub.pl b/src/lib/libcrypto/bn/asm/x86/sub.pl new file mode 100644 index 0000000000..837b0e1b07 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/sub.pl | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # x86 assember | ||
| 3 | |||
| 4 | sub bn_sub_words | ||
| 5 | { | ||
| 6 | local($name)=@_; | ||
| 7 | |||
| 8 | &function_begin($name,""); | ||
| 9 | |||
| 10 | &comment(""); | ||
| 11 | $a="esi"; | ||
| 12 | $b="edi"; | ||
| 13 | $c="eax"; | ||
| 14 | $r="ebx"; | ||
| 15 | $tmp1="ecx"; | ||
| 16 | $tmp2="edx"; | ||
| 17 | $num="ebp"; | ||
| 18 | |||
| 19 | &mov($r,&wparam(0)); # get r | ||
| 20 | &mov($a,&wparam(1)); # get a | ||
| 21 | &mov($b,&wparam(2)); # get b | ||
| 22 | &mov($num,&wparam(3)); # get num | ||
| 23 | &xor($c,$c); # clear carry | ||
| 24 | &and($num,0xfffffff8); # num / 8 | ||
| 25 | |||
| 26 | &jz(&label("aw_finish")); | ||
| 27 | |||
| 28 | &set_label("aw_loop",0); | ||
| 29 | for ($i=0; $i<8; $i++) | ||
| 30 | { | ||
| 31 | &comment("Round $i"); | ||
| 32 | |||
| 33 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
| 34 | &mov($tmp2,&DWP($i*4,$b,"",0)); # *b | ||
| 35 | &sub($tmp1,$c); | ||
| 36 | &mov($c,0); | ||
| 37 | &adc($c,$c); | ||
| 38 | &sub($tmp1,$tmp2); | ||
| 39 | &adc($c,0); | ||
| 40 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
| 41 | } | ||
| 42 | |||
| 43 | &comment(""); | ||
| 44 | &add($a,32); | ||
| 45 | &add($b,32); | ||
| 46 | &add($r,32); | ||
| 47 | &sub($num,8); | ||
| 48 | &jnz(&label("aw_loop")); | ||
| 49 | |||
| 50 | &set_label("aw_finish",0); | ||
| 51 | &mov($num,&wparam(3)); # get num | ||
| 52 | &and($num,7); | ||
| 53 | &jz(&label("aw_end")); | ||
| 54 | |||
| 55 | for ($i=0; $i<7; $i++) | ||
| 56 | { | ||
| 57 | &comment("Tail Round $i"); | ||
| 58 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
| 59 | &mov($tmp2,&DWP($i*4,$b,"",0));# *b | ||
| 60 | &sub($tmp1,$c); | ||
| 61 | &mov($c,0); | ||
| 62 | &adc($c,$c); | ||
| 63 | &sub($tmp1,$tmp2); | ||
| 64 | &adc($c,0); | ||
| 65 | &dec($num) if ($i != 6); | ||
| 66 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *a | ||
| 67 | &jz(&label("aw_end")) if ($i != 6); | ||
| 68 | } | ||
| 69 | &set_label("aw_end",0); | ||
| 70 | |||
| 71 | # &mov("eax",$c); # $c is "eax" | ||
| 72 | |||
| 73 | &function_end($name); | ||
| 74 | } | ||
| 75 | |||
| 76 | 1; | ||
diff --git a/src/lib/libcrypto/bn/old/b_sqr.c b/src/lib/libcrypto/bn/old/b_sqr.c new file mode 100644 index 0000000000..715cb1c8ab --- /dev/null +++ b/src/lib/libcrypto/bn/old/b_sqr.c | |||
| @@ -0,0 +1,199 @@ | |||
| 1 | /* crypto/bn/bn_mul.c */ | ||
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | ||
| 3 | * All rights reserved. | ||
| 4 | * | ||
| 5 | * This package is an SSL implementation written | ||
| 6 | * by Eric Young (eay@cryptsoft.com). | ||
| 7 | * The implementation was written so as to conform with Netscapes SSL. | ||
| 8 | * | ||
| 9 | * This library is free for commercial and non-commercial use as long as | ||
| 10 | * the following conditions are aheared to. The following conditions | ||
| 11 | * apply to all code found in this distribution, be it the RC4, RSA, | ||
| 12 | * lhash, DES, etc., code; not just the SSL code. The SSL documentation | ||
| 13 | * included with this distribution is covered by the same copyright terms | ||
| 14 | * except that the holder is Tim Hudson (tjh@cryptsoft.com). | ||
| 15 | * | ||
| 16 | * Copyright remains Eric Young's, and as such any Copyright notices in | ||
| 17 | * the code are not to be removed. | ||
| 18 | * If this package is used in a product, Eric Young should be given attribution | ||
| 19 | * as the author of the parts of the library used. | ||
| 20 | * This can be in the form of a textual message at program startup or | ||
| 21 | * in documentation (online or textual) provided with the package. | ||
| 22 | * | ||
| 23 | * Redistribution and use in source and binary forms, with or without | ||
| 24 | * modification, are permitted provided that the following conditions | ||
| 25 | * are met: | ||
| 26 | * 1. Redistributions of source code must retain the copyright | ||
| 27 | * notice, this list of conditions and the following disclaimer. | ||
| 28 | * 2. Redistributions in binary form must reproduce the above copyright | ||
| 29 | * notice, this list of conditions and the following disclaimer in the | ||
| 30 | * documentation and/or other materials provided with the distribution. | ||
| 31 | * 3. All advertising materials mentioning features or use of this software | ||
| 32 | * must display the following acknowledgement: | ||
| 33 | * "This product includes cryptographic software written by | ||
| 34 | * Eric Young (eay@cryptsoft.com)" | ||
| 35 | * The word 'cryptographic' can be left out if the rouines from the library | ||
| 36 | * being used are not cryptographic related :-). | ||
| 37 | * 4. If you include any Windows specific code (or a derivative thereof) from | ||
| 38 | * the apps directory (application code) you must include an acknowledgement: | ||
| 39 | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" | ||
| 40 | * | ||
| 41 | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND | ||
| 42 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
| 43 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
| 44 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||
| 45 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
| 46 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
| 47 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
| 48 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
| 49 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
| 50 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
| 51 | * SUCH DAMAGE. | ||
| 52 | * | ||
| 53 | * The licence and distribution terms for any publically available version or | ||
| 54 | * derivative of this code cannot be changed. i.e. this code cannot simply be | ||
| 55 | * copied and put under another distribution licence | ||
| 56 | * [including the GNU Public Licence.] | ||
| 57 | */ | ||
| 58 | |||
| 59 | #include <stdio.h> | ||
| 60 | #include "cryptlib.h" | ||
| 61 | #include "bn_lcl.h" | ||
| 62 | |||
| 63 | static int bn_mm(BIGNUM *m,BIGNUM *A,BIGNUM *B, BIGNUM *sk,BN_CTX *ctx); | ||
| 64 | |||
| 65 | /* r must be different to a and b */ | ||
| 66 | /* int BN_mmul(r, a, b) */ | ||
| 67 | int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b) | ||
| 68 | { | ||
| 69 | BN_ULONG *ap,*bp,*rp; | ||
| 70 | BIGNUM *sk; | ||
| 71 | int i,n,ret; | ||
| 72 | int max,al,bl; | ||
| 73 | BN_CTX ctx; | ||
| 74 | |||
| 75 | bn_check_top(a); | ||
| 76 | bn_check_top(b); | ||
| 77 | |||
| 78 | al=a->top; | ||
| 79 | bl=b->top; | ||
| 80 | if ((al == 0) || (bl == 0)) | ||
| 81 | { | ||
| 82 | r->top=0; | ||
| 83 | return(1); | ||
| 84 | } | ||
| 85 | #ifdef BN_MUL_DEBUG | ||
| 86 | printf("BN_mul(%d,%d)\n",a->top,b->top); | ||
| 87 | #endif | ||
| 88 | |||
| 89 | if ( (bn_limit_bits > 0) && | ||
| 90 | (bl > bn_limit_num) && (al > bn_limit_num)) | ||
| 91 | { | ||
| 92 | n=(BN_num_bits_word(al|bl)-bn_limit_bits); | ||
| 93 | n*=2; | ||
| 94 | sk=(BIGNUM *)Malloc(sizeof(BIGNUM)*n); | ||
| 95 | memset(sk,0,sizeof(BIGNUM)*n); | ||
| 96 | memset(&ctx,0,sizeof(ctx)); | ||
| 97 | |||
| 98 | ret=bn_mm(r,a,b,&(sk[0]),&ctx); | ||
| 99 | for (i=0; i<n; i+=2) | ||
| 100 | { | ||
| 101 | BN_clear_free(&sk[i]); | ||
| 102 | BN_clear_free(&sk[i+1]); | ||
| 103 | } | ||
| 104 | Free(sk); | ||
| 105 | return(ret); | ||
| 106 | } | ||
| 107 | |||
| 108 | max=(al+bl); | ||
| 109 | if (bn_wexpand(r,max) == NULL) return(0); | ||
| 110 | r->top=max; | ||
| 111 | r->neg=a->neg^b->neg; | ||
| 112 | ap=a->d; | ||
| 113 | bp=b->d; | ||
| 114 | rp=r->d; | ||
| 115 | |||
| 116 | rp[al]=bn_mul_words(rp,ap,al,*(bp++)); | ||
| 117 | rp++; | ||
| 118 | for (i=1; i<bl; i++) | ||
| 119 | { | ||
| 120 | rp[al]=bn_mul_add_words(rp,ap,al,*(bp++)); | ||
| 121 | rp++; | ||
| 122 | } | ||
| 123 | if ((max > 0) && (r->d[max-1] == 0)) r->top--; | ||
| 124 | return(1); | ||
| 125 | } | ||
| 126 | |||
| 127 | |||
| 128 | #define ahal (sk[0]) | ||
| 129 | #define blbh (sk[1]) | ||
| 130 | |||
| 131 | /* r must be different to a and b */ | ||
| 132 | int bn_mm(BIGNUM *m, BIGNUM *A, BIGNUM *B, BIGNUM *sk, BN_CTX *ctx) | ||
| 133 | { | ||
| 134 | int n,num,sqr=0; | ||
| 135 | int an,bn; | ||
| 136 | BIGNUM ah,al,bh,bl; | ||
| 137 | |||
| 138 | an=A->top; | ||
| 139 | bn=B->top; | ||
| 140 | #ifdef BN_MUL_DEBUG | ||
| 141 | printf("bn_mm(%d,%d)\n",A->top,B->top); | ||
| 142 | #endif | ||
| 143 | |||
| 144 | if (A == B) sqr=1; | ||
| 145 | num=(an>bn)?an:bn; | ||
| 146 | n=(num+1)/2; | ||
| 147 | /* Are going to now chop things into 'num' word chunks. */ | ||
| 148 | |||
| 149 | BN_init(&ah); | ||
| 150 | BN_init(&al); | ||
| 151 | BN_init(&bh); | ||
| 152 | BN_init(&bl); | ||
| 153 | |||
| 154 | bn_set_low (&al,A,n); | ||
| 155 | bn_set_high(&ah,A,n); | ||
| 156 | bn_set_low (&bl,B,n); | ||
| 157 | bn_set_high(&bh,B,n); | ||
| 158 | |||
| 159 | BN_sub(&ahal,&ah,&al); | ||
| 160 | BN_sub(&blbh,&bl,&bh); | ||
| 161 | |||
| 162 | if (num <= (bn_limit_num+bn_limit_num)) | ||
| 163 | { | ||
| 164 | BN_mul(m,&ahal,&blbh); | ||
| 165 | if (sqr) | ||
| 166 | { | ||
| 167 | BN_sqr(&ahal,&al,ctx); | ||
| 168 | BN_sqr(&blbh,&ah,ctx); | ||
| 169 | } | ||
| 170 | else | ||
| 171 | { | ||
| 172 | BN_mul(&ahal,&al,&bl); | ||
| 173 | BN_mul(&blbh,&ah,&bh); | ||
| 174 | } | ||
| 175 | } | ||
| 176 | else | ||
| 177 | { | ||
| 178 | bn_mm(m,&ahal,&blbh,&(sk[2]),ctx); | ||
| 179 | bn_mm(&ahal,&al,&bl,&(sk[2]),ctx); | ||
| 180 | bn_mm(&blbh,&ah,&bh,&(sk[2]),ctx); | ||
| 181 | } | ||
| 182 | |||
| 183 | BN_add(m,m,&ahal); | ||
| 184 | BN_add(m,m,&blbh); | ||
| 185 | |||
| 186 | BN_lshift(m,m,n*BN_BITS2); | ||
| 187 | BN_lshift(&blbh,&blbh,n*BN_BITS2*2); | ||
| 188 | |||
| 189 | BN_add(m,m,&ahal); | ||
| 190 | BN_add(m,m,&blbh); | ||
| 191 | |||
| 192 | m->neg=A->neg^B->neg; | ||
| 193 | return(1); | ||
| 194 | } | ||
| 195 | #undef ahal (sk[0]) | ||
| 196 | #undef blbh (sk[1]) | ||
| 197 | |||
| 198 | #include "bn_low.c" | ||
| 199 | #include "bn_high.c" | ||
diff --git a/src/lib/libcrypto/bn/old/bn_com.c b/src/lib/libcrypto/bn/old/bn_com.c new file mode 100644 index 0000000000..7666b2304c --- /dev/null +++ b/src/lib/libcrypto/bn/old/bn_com.c | |||
| @@ -0,0 +1,90 @@ | |||
| 1 | /* crypto/bn/bn_mulw.c */ | ||
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | ||
| 3 | * All rights reserved. | ||
| 4 | * | ||
| 5 | * This package is an SSL implementation written | ||
| 6 | * by Eric Young (eay@cryptsoft.com). | ||
| 7 | * The implementation was written so as to conform with Netscapes SSL. | ||
| 8 | * | ||
| 9 | * This library is free for commercial and non-commercial use as long as | ||
| 10 | * the following conditions are aheared to. The following conditions | ||
| 11 | * apply to all code found in this distribution, be it the RC4, RSA, | ||
| 12 | * lhash, DES, etc., code; not just the SSL code. The SSL documentation | ||
| 13 | * included with this distribution is covered by the same copyright terms | ||
| 14 | * except that the holder is Tim Hudson (tjh@cryptsoft.com). | ||
| 15 | * | ||
| 16 | * Copyright remains Eric Young's, and as such any Copyright notices in | ||
| 17 | * the code are not to be removed. | ||
| 18 | * If this package is used in a product, Eric Young should be given attribution | ||
| 19 | * as the author of the parts of the library used. | ||
| 20 | * This can be in the form of a textual message at program startup or | ||
| 21 | * in documentation (online or textual) provided with the package. | ||
| 22 | * | ||
| 23 | * Redistribution and use in source and binary forms, with or without | ||
| 24 | * modification, are permitted provided that the following conditions | ||
| 25 | * are met: | ||
| 26 | * 1. Redistributions of source code must retain the copyright | ||
| 27 | * notice, this list of conditions and the following disclaimer. | ||
| 28 | * 2. Redistributions in binary form must reproduce the above copyright | ||
| 29 | * notice, this list of conditions and the following disclaimer in the | ||
| 30 | * documentation and/or other materials provided with the distribution. | ||
| 31 | * 3. All advertising materials mentioning features or use of this software | ||
| 32 | * must display the following acknowledgement: | ||
| 33 | * "This product includes cryptographic software written by | ||
| 34 | * Eric Young (eay@cryptsoft.com)" | ||
| 35 | * The word 'cryptographic' can be left out if the rouines from the library | ||
| 36 | * being used are not cryptographic related :-). | ||
| 37 | * 4. If you include any Windows specific code (or a derivative thereof) from | ||
| 38 | * the apps directory (application code) you must include an acknowledgement: | ||
| 39 | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" | ||
| 40 | * | ||
| 41 | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND | ||
| 42 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
| 43 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
| 44 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||
| 45 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
| 46 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
| 47 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
| 48 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
| 49 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
| 50 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
| 51 | * SUCH DAMAGE. | ||
| 52 | * | ||
| 53 | * The licence and distribution terms for any publically available version or | ||
| 54 | * derivative of this code cannot be changed. i.e. this code cannot simply be | ||
| 55 | * copied and put under another distribution licence | ||
| 56 | * [including the GNU Public Licence.] | ||
| 57 | */ | ||
| 58 | |||
| 59 | #include <stdio.h> | ||
| 60 | #include "cryptlib.h" | ||
| 61 | #include "bn_lcl.h" | ||
| 62 | |||
| 63 | #ifdef BN_LLONG | ||
| 64 | |||
| 65 | ab | ||
| 66 | 12 | ||
| 67 | a2 b2 | ||
| 68 | a1 b1 | ||
| 69 | |||
| 70 | abc | ||
| 71 | 123 | ||
| 72 | a3 b3 c3 | ||
| 73 | a2 b2 c2 | ||
| 74 | a1 b1 c1 | ||
| 75 | |||
| 76 | abcd | ||
| 77 | 1234 | ||
| 78 | a4 b4 c4 d4 | ||
| 79 | a3 b3 c3 d3 | ||
| 80 | a2 b2 c2 d2 | ||
| 81 | a1 b1 c1 d1 | ||
| 82 | |||
| 83 | abcde | ||
| 84 | 01234 | ||
| 85 | a5 b5 c5 d5 e5 | ||
| 86 | a4 b4 c4 d4 e4 | ||
| 87 | a3 b3 c3 d3 e3 | ||
| 88 | a2 b2 c2 d2 e2 | ||
| 89 | a1 b1 c1 d1 e1 | ||
| 90 | a0 b0 c0 d0 e0 | ||
diff --git a/src/lib/libcrypto/bn/old/bn_high.c b/src/lib/libcrypto/bn/old/bn_high.c new file mode 100644 index 0000000000..763bcb605b --- /dev/null +++ b/src/lib/libcrypto/bn/old/bn_high.c | |||
| @@ -0,0 +1,135 @@ | |||
| 1 | #include <stdio.h> | ||
| 2 | #include "cryptlib.h" | ||
| 3 | #include "bn_lcl.h" | ||
| 4 | |||
| 5 | #undef BN_MUL_HIGH_DEBUG | ||
| 6 | |||
| 7 | #ifdef BN_MUL_HIGH_DEBUG | ||
| 8 | #define debug_BN_print(a,b,c) BN_print_fp(a,b); printf(c); | ||
| 9 | #else | ||
| 10 | #define debug_BN_print(a,b,c) | ||
| 11 | #endif | ||
| 12 | |||
| 13 | int BN_mul_high(BIGNUM *r,BIGNUM *a,BIGNUM *b,BIGNUM *low, int words); | ||
| 14 | |||
| 15 | #undef t1 | ||
| 16 | #undef t2 | ||
| 17 | |||
| 18 | int BN_mul_high(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *low, int words) | ||
| 19 | { | ||
| 20 | int w2,borrow=0,full=0; | ||
| 21 | BIGNUM t1,t2,t3,h,ah,al,bh,bl,m,s0,s1; | ||
| 22 | BN_ULONG ul1,ul2; | ||
| 23 | |||
| 24 | BN_mul(r,a,b); | ||
| 25 | BN_rshift(r,r,words*BN_BITS2); | ||
| 26 | return(1); | ||
| 27 | |||
| 28 | w2=(words+1)/2; | ||
| 29 | |||
| 30 | #ifdef BN_MUL_HIGH_DEBUG | ||
| 31 | fprintf(stdout,"words=%d w2=%d\n",words,w2); | ||
| 32 | #endif | ||
| 33 | debug_BN_print(stdout,a," a\n"); | ||
| 34 | debug_BN_print(stdout,b," b\n"); | ||
| 35 | debug_BN_print(stdout,low," low\n"); | ||
| 36 | BN_init(&al); BN_init(&ah); | ||
| 37 | BN_init(&bl); BN_init(&bh); | ||
| 38 | BN_init(&t1); BN_init(&t2); BN_init(&t3); | ||
| 39 | BN_init(&s0); BN_init(&s1); | ||
| 40 | BN_init(&h); BN_init(&m); | ||
| 41 | |||
| 42 | bn_set_low (&al,a,w2); | ||
| 43 | bn_set_high(&ah,a,w2); | ||
| 44 | bn_set_low (&bl,b,w2); | ||
| 45 | bn_set_high(&bh,b,w2); | ||
| 46 | |||
| 47 | bn_set_low(&s0,low,w2); | ||
| 48 | bn_set_high(&s1,low,w2); | ||
| 49 | |||
| 50 | debug_BN_print(stdout,&al," al\n"); | ||
| 51 | debug_BN_print(stdout,&ah," ah\n"); | ||
| 52 | debug_BN_print(stdout,&bl," bl\n"); | ||
| 53 | debug_BN_print(stdout,&bh," bh\n"); | ||
| 54 | debug_BN_print(stdout,&s0," s0\n"); | ||
| 55 | debug_BN_print(stdout,&s1," s1\n"); | ||
| 56 | |||
| 57 | /* Calculate (al-ah)*(bh-bl) */ | ||
| 58 | BN_sub(&t1,&al,&ah); | ||
| 59 | BN_sub(&t2,&bh,&bl); | ||
| 60 | BN_mul(&m,&t1,&t2); | ||
| 61 | |||
| 62 | /* Calculate ah*bh */ | ||
| 63 | BN_mul(&h,&ah,&bh); | ||
| 64 | |||
| 65 | /* s0 == low(al*bl) | ||
| 66 | * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl) | ||
| 67 | * We know s0 and s1 so the only unknown is high(al*bl) | ||
| 68 | * high(al*bl) == s1 - low(ah*bh+(al-ah)*(bh-bl)+s0) | ||
| 69 | */ | ||
| 70 | BN_add(&m,&m,&h); | ||
| 71 | BN_add(&t2,&m,&s0); | ||
| 72 | |||
| 73 | debug_BN_print(stdout,&t2," middle value\n"); | ||
| 74 | |||
| 75 | /* Quick and dirty mask off of high words */ | ||
| 76 | if (w2 < t2.top) t2.top=w2; | ||
| 77 | #if 0 | ||
| 78 | bn_set_low(&t3,&t2,w2); | ||
| 79 | #endif | ||
| 80 | |||
| 81 | debug_BN_print(stdout,&t2," low middle value\n"); | ||
| 82 | BN_sub(&t1,&s1,&t2); | ||
| 83 | |||
| 84 | if (t1.neg) | ||
| 85 | { | ||
| 86 | debug_BN_print(stdout,&t1," before\n"); | ||
| 87 | BN_zero(&t2); | ||
| 88 | BN_set_bit(&t2,w2*BN_BITS2); | ||
| 89 | BN_add(&t1,&t2,&t1); | ||
| 90 | /* BN_mask_bits(&t1,w2*BN_BITS2); */ | ||
| 91 | /* if (words < t1.top) t1.top=words; */ | ||
| 92 | debug_BN_print(stdout,&t1," after\n"); | ||
| 93 | borrow=1; | ||
| 94 | } | ||
| 95 | |||
| 96 | /* XXXXX SPEED THIS UP */ | ||
| 97 | /* al*bl == high(al*bl)<<words+s0 */ | ||
| 98 | BN_lshift(&t1,&t1,w2*BN_BITS2); | ||
| 99 | BN_add(&t1,&t1,&s0); | ||
| 100 | if (w2*2 < t1.top) t1.top=w2*2; /* This should not happen? */ | ||
| 101 | |||
| 102 | /* We now have | ||
| 103 | * al*bl - t1 | ||
| 104 | * (al-ah)*(bh-bl)+ah*bh - m | ||
| 105 | * ah*bh - h | ||
| 106 | */ | ||
| 107 | #if 0 | ||
| 108 | BN_add(&m,&m,&t1); | ||
| 109 | debug_BN_print(stdout,&t1," s10\n"); | ||
| 110 | debug_BN_print(stdout,&m," s21\n"); | ||
| 111 | debug_BN_print(stdout,&h," s32\n"); | ||
| 112 | BN_lshift(&m,&m,w2*BN_BITS2); | ||
| 113 | BN_lshift(&h,&h,w2*2*BN_BITS2); | ||
| 114 | BN_add(r,&m,&t1); | ||
| 115 | BN_add(r,r,&h); | ||
| 116 | BN_rshift(r,r,w2*2*BN_BITS2); | ||
| 117 | #else | ||
| 118 | BN_add(&m,&m,&t1); /* Do a cmp then +1 if needed? */ | ||
| 119 | bn_set_high(&t3,&t1,w2); | ||
| 120 | BN_add(&m,&m,&t3); | ||
| 121 | bn_set_high(&t3,&m,w2); | ||
| 122 | BN_add(r,&h,&t3); | ||
| 123 | #endif | ||
| 124 | |||
| 125 | #ifdef BN_MUL_HIGH_DEBUG | ||
| 126 | printf("carry=%d\n",borrow); | ||
| 127 | #endif | ||
| 128 | debug_BN_print(stdout,r," ret\n"); | ||
| 129 | BN_free(&t1); BN_free(&t2); | ||
| 130 | BN_free(&m); BN_free(&h); | ||
| 131 | return(1); | ||
| 132 | } | ||
| 133 | |||
| 134 | |||
| 135 | |||
diff --git a/src/lib/libcrypto/bn/old/bn_ka.c b/src/lib/libcrypto/bn/old/bn_ka.c new file mode 100644 index 0000000000..378c94dc5a --- /dev/null +++ b/src/lib/libcrypto/bn/old/bn_ka.c | |||
| @@ -0,0 +1,567 @@ | |||
| 1 | #include <stdio.h> | ||
| 2 | #include <stdlib.h> | ||
| 3 | #include <strings.h> | ||
| 4 | #include "bn_lcl.h" | ||
| 5 | |||
| 6 | /* r is 2*n2 words in size, | ||
| 7 | * a and b are both n2 words in size. | ||
| 8 | * n2 must be a power of 2. | ||
| 9 | * We multiply and return the result. | ||
| 10 | * t must be 2*n2 words in size | ||
| 11 | * We calulate | ||
| 12 | * a[0]*b[0] | ||
| 13 | * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) | ||
| 14 | * a[1]*b[1] | ||
| 15 | */ | ||
| 16 | void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | ||
| 17 | BN_ULONG *t) | ||
| 18 | { | ||
| 19 | int n=n2/2; | ||
| 20 | int neg,zero,c1,c2; | ||
| 21 | BN_ULONG ln,lo,*p; | ||
| 22 | |||
| 23 | #ifdef BN_COUNT | ||
| 24 | printf(" bn_mul_recursive %d * %d\n",n2,n2); | ||
| 25 | #endif | ||
| 26 | if (n2 <= 8) | ||
| 27 | { | ||
| 28 | if (n2 == 8) | ||
| 29 | bn_mul_comba8(r,a,b); | ||
| 30 | else | ||
| 31 | bn_mul_normal(r,a,n2,b,n2); | ||
| 32 | return; | ||
| 33 | } | ||
| 34 | |||
| 35 | if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) | ||
| 36 | { | ||
| 37 | /* This should not happen */ | ||
| 38 | /*abort(); */ | ||
| 39 | bn_mul_normal(r,a,n2,b,n2); | ||
| 40 | return; | ||
| 41 | } | ||
| 42 | /* r=(a[0]-a[1])*(b[1]-b[0]) */ | ||
| 43 | c1=bn_cmp_words(a,&(a[n]),n); | ||
| 44 | c2=bn_cmp_words(&(b[n]),b,n); | ||
| 45 | zero=neg=0; | ||
| 46 | switch (c1*3+c2) | ||
| 47 | { | ||
| 48 | case -4: | ||
| 49 | bn_sub_words(t, &(a[n]),a, n); /* - */ | ||
| 50 | bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ | ||
| 51 | break; | ||
| 52 | case -3: | ||
| 53 | zero=1; | ||
| 54 | break; | ||
| 55 | case -2: | ||
| 56 | bn_sub_words(t, &(a[n]),a, n); /* - */ | ||
| 57 | bn_sub_words(&(t[n]),&(b[n]),b, n); /* + */ | ||
| 58 | neg=1; | ||
| 59 | break; | ||
| 60 | case -1: | ||
| 61 | case 0: | ||
| 62 | case 1: | ||
| 63 | zero=1; | ||
| 64 | break; | ||
| 65 | case 2: | ||
| 66 | bn_sub_words(t, a, &(a[n]),n); /* + */ | ||
| 67 | bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ | ||
| 68 | neg=1; | ||
| 69 | break; | ||
| 70 | case 3: | ||
| 71 | zero=1; | ||
| 72 | break; | ||
| 73 | case 4: | ||
| 74 | bn_sub_words(t, a, &(a[n]),n); | ||
| 75 | bn_sub_words(&(t[n]),&(b[n]),b, n); | ||
| 76 | break; | ||
| 77 | } | ||
| 78 | |||
| 79 | if (n == 8) | ||
| 80 | { | ||
| 81 | if (!zero) | ||
| 82 | bn_mul_comba8(&(t[n2]),t,&(t[n])); | ||
| 83 | else | ||
| 84 | memset(&(t[n2]),0,8*sizeof(BN_ULONG)); | ||
| 85 | |||
| 86 | bn_mul_comba8(r,a,b); | ||
| 87 | bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n])); | ||
| 88 | } | ||
| 89 | else | ||
| 90 | { | ||
| 91 | p= &(t[n2*2]); | ||
| 92 | if (!zero) | ||
| 93 | bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p); | ||
| 94 | else | ||
| 95 | memset(&(t[n2]),0,n*sizeof(BN_ULONG)); | ||
| 96 | bn_mul_recursive(r,a,b,n,p); | ||
| 97 | bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,p); | ||
| 98 | } | ||
| 99 | |||
| 100 | /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign | ||
| 101 | * r[10] holds (a[0]*b[0]) | ||
| 102 | * r[32] holds (b[1]*b[1]) | ||
| 103 | */ | ||
| 104 | |||
| 105 | c1=bn_add_words(t,r,&(r[n2]),n2); | ||
| 106 | |||
| 107 | if (neg) /* if t[32] is negative */ | ||
| 108 | { | ||
| 109 | c1-=bn_sub_words(&(t[n2]),t,&(t[n2]),n2); | ||
| 110 | } | ||
| 111 | else | ||
| 112 | { | ||
| 113 | /* Might have a carry */ | ||
| 114 | c1+=bn_add_words(&(t[n2]),&(t[n2]),t,n2); | ||
| 115 | } | ||
| 116 | |||
| 117 | /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) | ||
| 118 | * r[10] holds (a[0]*b[0]) | ||
| 119 | * r[32] holds (b[1]*b[1]) | ||
| 120 | * c1 holds the carry bits | ||
| 121 | */ | ||
| 122 | c1+=bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2); | ||
| 123 | if (c1) | ||
| 124 | { | ||
| 125 | p= &(r[n+n2]); | ||
| 126 | lo= *p; | ||
| 127 | ln=(lo+c1)&BN_MASK2; | ||
| 128 | *p=ln; | ||
| 129 | |||
| 130 | /* The overflow will stop before we over write | ||
| 131 | * words we should not overwrite */ | ||
| 132 | if (ln < c1) | ||
| 133 | { | ||
| 134 | do { | ||
| 135 | p++; | ||
| 136 | lo= *p; | ||
| 137 | ln=(lo+1)&BN_MASK2; | ||
| 138 | *p=ln; | ||
| 139 | } while (ln == 0); | ||
| 140 | } | ||
| 141 | } | ||
| 142 | } | ||
| 143 | |||
| 144 | /* n+tn is the word length | ||
| 145 | * t needs to be n*4 is size, as does r */ | ||
| 146 | void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn, | ||
| 147 | int n, BN_ULONG *t) | ||
| 148 | { | ||
| 149 | int n2=n*2,i,j; | ||
| 150 | int c1; | ||
| 151 | BN_ULONG ln,lo,*p; | ||
| 152 | |||
| 153 | #ifdef BN_COUNT | ||
| 154 | printf(" bn_mul_part_recursive %d * %d\n",tn+n,tn+n); | ||
| 155 | #endif | ||
| 156 | if (n < 8) | ||
| 157 | { | ||
| 158 | i=tn+n; | ||
| 159 | bn_mul_normal(r,a,i,b,i); | ||
| 160 | return; | ||
| 161 | } | ||
| 162 | |||
| 163 | /* r=(a[0]-a[1])*(b[1]-b[0]) */ | ||
| 164 | bn_sub_words(t, a, &(a[n]),n); /* + */ | ||
| 165 | bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ | ||
| 166 | |||
| 167 | if (n == 8) | ||
| 168 | { | ||
| 169 | bn_mul_comba8(&(t[n2]),t,&(t[n])); | ||
| 170 | bn_mul_comba8(r,a,b); | ||
| 171 | bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn); | ||
| 172 | memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2)); | ||
| 173 | } | ||
| 174 | else | ||
| 175 | { | ||
| 176 | p= &(t[n2*2]); | ||
| 177 | bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p); | ||
| 178 | bn_mul_recursive(r,a,b,n,p); | ||
| 179 | i=n/2; | ||
| 180 | /* If there is only a bottom half to the number, | ||
| 181 | * just do it */ | ||
| 182 | j=tn-i; | ||
| 183 | if (j == 0) | ||
| 184 | { | ||
| 185 | bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),i,p); | ||
| 186 | memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2)); | ||
| 187 | } | ||
| 188 | else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */ | ||
| 189 | { | ||
| 190 | bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]), | ||
| 191 | j,i,p); | ||
| 192 | memset(&(r[n2+tn*2]),0, | ||
| 193 | sizeof(BN_ULONG)*(n2-tn*2)); | ||
| 194 | } | ||
| 195 | else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ | ||
| 196 | { | ||
| 197 | memset(&(r[n2]),0,sizeof(BN_ULONG)*(tn*2)); | ||
| 198 | for (;;) | ||
| 199 | { | ||
| 200 | i/=2; | ||
| 201 | if (i < tn) | ||
| 202 | { | ||
| 203 | bn_mul_part_recursive(&(r[n2]), | ||
| 204 | &(a[n]),&(b[n]), | ||
| 205 | tn-i,i,p); | ||
| 206 | break; | ||
| 207 | } | ||
| 208 | else if (i == tn) | ||
| 209 | { | ||
| 210 | bn_mul_recursive(&(r[n2]), | ||
| 211 | &(a[n]),&(b[n]), | ||
| 212 | i,p); | ||
| 213 | break; | ||
| 214 | } | ||
| 215 | } | ||
| 216 | } | ||
| 217 | } | ||
| 218 | |||
| 219 | /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign | ||
| 220 | * r[10] holds (a[0]*b[0]) | ||
| 221 | * r[32] holds (b[1]*b[1]) | ||
| 222 | */ | ||
| 223 | |||
| 224 | c1=bn_add_words(t,r,&(r[n2]),n2); | ||
| 225 | c1-=bn_sub_words(&(t[n2]),t,&(t[n2]),n2); | ||
| 226 | |||
| 227 | /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) | ||
| 228 | * r[10] holds (a[0]*b[0]) | ||
| 229 | * r[32] holds (b[1]*b[1]) | ||
| 230 | * c1 holds the carry bits | ||
| 231 | */ | ||
| 232 | c1+=bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2); | ||
| 233 | if (c1) | ||
| 234 | { | ||
| 235 | p= &(r[n+n2]); | ||
| 236 | lo= *p; | ||
| 237 | ln=(lo+c1)&BN_MASK2; | ||
| 238 | *p=ln; | ||
| 239 | |||
| 240 | /* The overflow will stop before we over write | ||
| 241 | * words we should not overwrite */ | ||
| 242 | if (ln < c1) | ||
| 243 | { | ||
| 244 | do { | ||
| 245 | p++; | ||
| 246 | lo= *p; | ||
| 247 | ln=(lo+1)&BN_MASK2; | ||
| 248 | *p=ln; | ||
| 249 | } while (ln == 0); | ||
| 250 | } | ||
| 251 | } | ||
| 252 | } | ||
| 253 | |||
| 254 | /* r is 2*n words in size, | ||
| 255 | * a and b are both n words in size. | ||
| 256 | * n must be a power of 2. | ||
| 257 | * We multiply and return the result. | ||
| 258 | * t must be 2*n words in size | ||
| 259 | * We calulate | ||
| 260 | * a[0]*b[0] | ||
| 261 | * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) | ||
| 262 | * a[1]*b[1] | ||
| 263 | */ | ||
| 264 | void bn_sqr_recursive(BN_ULONG *r, BN_ULONG *a, int n2, BN_ULONG *t) | ||
| 265 | { | ||
| 266 | int n=n2/2; | ||
| 267 | int zero,c1; | ||
| 268 | BN_ULONG ln,lo,*p; | ||
| 269 | |||
| 270 | #ifdef BN_COUNT | ||
| 271 | printf(" bn_sqr_recursive %d * %d\n",n2,n2); | ||
| 272 | #endif | ||
| 273 | if (n2 == 4) | ||
| 274 | { | ||
| 275 | bn_sqr_comba4(r,a); | ||
| 276 | return; | ||
| 277 | } | ||
| 278 | else if (n2 == 8) | ||
| 279 | { | ||
| 280 | bn_sqr_comba8(r,a); | ||
| 281 | return; | ||
| 282 | } | ||
| 283 | if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL) | ||
| 284 | { | ||
| 285 | bn_sqr_normal(r,a,n2,t); | ||
| 286 | return; | ||
| 287 | abort(); | ||
| 288 | } | ||
| 289 | /* r=(a[0]-a[1])*(a[1]-a[0]) */ | ||
| 290 | c1=bn_cmp_words(a,&(a[n]),n); | ||
| 291 | zero=0; | ||
| 292 | if (c1 > 0) | ||
| 293 | bn_sub_words(t,a,&(a[n]),n); | ||
| 294 | else if (c1 < 0) | ||
| 295 | bn_sub_words(t,&(a[n]),a,n); | ||
| 296 | else | ||
| 297 | zero=1; | ||
| 298 | |||
| 299 | /* The result will always be negative unless it is zero */ | ||
| 300 | |||
| 301 | if (n == 8) | ||
| 302 | { | ||
| 303 | if (!zero) | ||
| 304 | bn_sqr_comba8(&(t[n2]),t); | ||
| 305 | else | ||
| 306 | memset(&(t[n2]),0,8*sizeof(BN_ULONG)); | ||
| 307 | |||
| 308 | bn_sqr_comba8(r,a); | ||
| 309 | bn_sqr_comba8(&(r[n2]),&(a[n])); | ||
| 310 | } | ||
| 311 | else | ||
| 312 | { | ||
| 313 | p= &(t[n2*2]); | ||
| 314 | if (!zero) | ||
| 315 | bn_sqr_recursive(&(t[n2]),t,n,p); | ||
| 316 | else | ||
| 317 | memset(&(t[n2]),0,n*sizeof(BN_ULONG)); | ||
| 318 | bn_sqr_recursive(r,a,n,p); | ||
| 319 | bn_sqr_recursive(&(r[n2]),&(a[n]),n,p); | ||
| 320 | } | ||
| 321 | |||
| 322 | /* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero | ||
| 323 | * r[10] holds (a[0]*b[0]) | ||
| 324 | * r[32] holds (b[1]*b[1]) | ||
| 325 | */ | ||
| 326 | |||
| 327 | c1=bn_add_words(t,r,&(r[n2]),n2); | ||
| 328 | |||
| 329 | /* t[32] is negative */ | ||
| 330 | c1-=bn_sub_words(&(t[n2]),t,&(t[n2]),n2); | ||
| 331 | |||
| 332 | /* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1]) | ||
| 333 | * r[10] holds (a[0]*a[0]) | ||
| 334 | * r[32] holds (a[1]*a[1]) | ||
| 335 | * c1 holds the carry bits | ||
| 336 | */ | ||
| 337 | c1+=bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2); | ||
| 338 | if (c1) | ||
| 339 | { | ||
| 340 | p= &(r[n+n2]); | ||
| 341 | lo= *p; | ||
| 342 | ln=(lo+c1)&BN_MASK2; | ||
| 343 | *p=ln; | ||
| 344 | |||
| 345 | /* The overflow will stop before we over write | ||
| 346 | * words we should not overwrite */ | ||
| 347 | if (ln < c1) | ||
| 348 | { | ||
| 349 | do { | ||
| 350 | p++; | ||
| 351 | lo= *p; | ||
| 352 | ln=(lo+1)&BN_MASK2; | ||
| 353 | *p=ln; | ||
| 354 | } while (ln == 0); | ||
| 355 | } | ||
| 356 | } | ||
| 357 | } | ||
| 358 | |||
| 359 | #if 1 | ||
| 360 | /* a and b must be the same size, which is n2. | ||
| 361 | * r needs to be n2 words and t needs to be n2*2 | ||
| 362 | */ | ||
| 363 | void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | ||
| 364 | BN_ULONG *t) | ||
| 365 | { | ||
| 366 | int n=n2/2; | ||
| 367 | |||
| 368 | #ifdef BN_COUNT | ||
| 369 | printf(" bn_mul_low_recursive %d * %d\n",n2,n2); | ||
| 370 | #endif | ||
| 371 | |||
| 372 | bn_mul_recursive(r,a,b,n,&(t[0])); | ||
| 373 | if (n > BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) | ||
| 374 | { | ||
| 375 | bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2])); | ||
| 376 | bn_add_words(&(r[n]),&(r[n]),&(t[0]),n); | ||
| 377 | bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2])); | ||
| 378 | bn_add_words(&(r[n]),&(r[n]),&(t[0]),n); | ||
| 379 | } | ||
| 380 | else | ||
| 381 | { | ||
| 382 | bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n); | ||
| 383 | bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n); | ||
| 384 | bn_add_words(&(r[n]),&(r[n]),&(t[0]),n); | ||
| 385 | bn_add_words(&(r[n]),&(r[n]),&(t[n]),n); | ||
| 386 | } | ||
| 387 | } | ||
| 388 | |||
| 389 | /* a and b must be the same size, which is n2. | ||
| 390 | * r needs to be n2 words and t needs to be n2*2 | ||
| 391 | * l is the low words of the output. | ||
| 392 | * t needs to be n2*3 | ||
| 393 | */ | ||
| 394 | void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, | ||
| 395 | BN_ULONG *t) | ||
| 396 | { | ||
| 397 | int j,i,n,c1,c2; | ||
| 398 | int neg,oneg,zero; | ||
| 399 | BN_ULONG ll,lc,*lp,*mp; | ||
| 400 | |||
| 401 | #ifdef BN_COUNT | ||
| 402 | printf(" bn_mul_high %d * %d\n",n2,n2); | ||
| 403 | #endif | ||
| 404 | n=(n2+1)/2; | ||
| 405 | |||
| 406 | /* Calculate (al-ah)*(bh-bl) */ | ||
| 407 | neg=zero=0; | ||
| 408 | c1=bn_cmp_words(&(a[0]),&(a[n]),n); | ||
| 409 | c2=bn_cmp_words(&(b[n]),&(b[0]),n); | ||
| 410 | switch (c1*3+c2) | ||
| 411 | { | ||
| 412 | case -4: | ||
| 413 | bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n); | ||
| 414 | bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n); | ||
| 415 | break; | ||
| 416 | case -3: | ||
| 417 | zero=1; | ||
| 418 | break; | ||
| 419 | case -2: | ||
| 420 | bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n); | ||
| 421 | bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n); | ||
| 422 | neg=1; | ||
| 423 | break; | ||
| 424 | case -1: | ||
| 425 | case 0: | ||
| 426 | case 1: | ||
| 427 | zero=1; | ||
| 428 | break; | ||
| 429 | case 2: | ||
| 430 | bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n); | ||
| 431 | bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n); | ||
| 432 | neg=1; | ||
| 433 | break; | ||
| 434 | case 3: | ||
| 435 | zero=1; | ||
| 436 | break; | ||
| 437 | case 4: | ||
| 438 | bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n); | ||
| 439 | bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n); | ||
| 440 | break; | ||
| 441 | } | ||
| 442 | |||
| 443 | oneg=neg; | ||
| 444 | /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */ | ||
| 445 | bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,&(t[n2])); | ||
| 446 | /* r[10] = (a[1]*b[1]) */ | ||
| 447 | bn_mul_recursive(r,&(a[n]),&(b[n]),n,&(t[n2])); | ||
| 448 | |||
| 449 | /* s0 == low(al*bl) | ||
| 450 | * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl) | ||
| 451 | * We know s0 and s1 so the only unknown is high(al*bl) | ||
| 452 | * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl)) | ||
| 453 | * high(al*bl) == s1 - (r[0]+l[0]+t[0]) | ||
| 454 | */ | ||
| 455 | if (l != NULL) | ||
| 456 | { | ||
| 457 | lp= &(t[n2+n]); | ||
| 458 | c1=bn_add_words(lp,&(r[0]),&(l[0]),n); | ||
| 459 | } | ||
| 460 | else | ||
| 461 | { | ||
| 462 | c1=0; | ||
| 463 | lp= &(r[0]); | ||
| 464 | } | ||
| 465 | |||
| 466 | if (neg) | ||
| 467 | neg=bn_sub_words(&(t[n2]),lp,&(t[0]),n); | ||
| 468 | else | ||
| 469 | { | ||
| 470 | bn_add_words(&(t[n2]),lp,&(t[0]),n); | ||
| 471 | neg=0; | ||
| 472 | } | ||
| 473 | |||
| 474 | if (l != NULL) | ||
| 475 | { | ||
| 476 | bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n); | ||
| 477 | } | ||
| 478 | else | ||
| 479 | { | ||
| 480 | lp= &(t[n2+n]); | ||
| 481 | mp= &(t[n2]); | ||
| 482 | for (i=0; i<n; i++) | ||
| 483 | lp[i]=((~mp[i])+1)&BN_MASK2; | ||
| 484 | } | ||
| 485 | |||
| 486 | /* s[0] = low(al*bl) | ||
| 487 | * t[3] = high(al*bl) | ||
| 488 | * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign | ||
| 489 | * r[10] = (a[1]*b[1]) | ||
| 490 | */ | ||
| 491 | /* R[10] = al*bl | ||
| 492 | * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0]) | ||
| 493 | * R[32] = ah*bh | ||
| 494 | */ | ||
| 495 | /* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow) | ||
| 496 | * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow) | ||
| 497 | * R[3]=r[1]+(carry/borrow) | ||
| 498 | */ | ||
| 499 | if (l != NULL) | ||
| 500 | { | ||
| 501 | lp= &(t[n2]); | ||
| 502 | c1= bn_add_words(lp,&(t[n2+n]),&(l[0]),n); | ||
| 503 | } | ||
| 504 | else | ||
| 505 | { | ||
| 506 | lp= &(t[n2+n]); | ||
| 507 | c1=0; | ||
| 508 | } | ||
| 509 | c1+=bn_add_words(&(t[n2]),lp, &(r[0]),n); | ||
| 510 | if (oneg) | ||
| 511 | c1-=bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n); | ||
| 512 | else | ||
| 513 | c1+=bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n); | ||
| 514 | |||
| 515 | c2 =bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n); | ||
| 516 | c2+=bn_add_words(&(r[0]),&(r[0]),&(r[n]),n); | ||
| 517 | if (oneg) | ||
| 518 | c2-=bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n); | ||
| 519 | else | ||
| 520 | c2+=bn_add_words(&(r[0]),&(r[0]),&(t[n]),n); | ||
| 521 | |||
| 522 | if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */ | ||
| 523 | { | ||
| 524 | i=0; | ||
| 525 | if (c1 > 0) | ||
| 526 | { | ||
| 527 | lc=c1; | ||
| 528 | do { | ||
| 529 | ll=(r[i]+lc)&BN_MASK2; | ||
| 530 | r[i++]=ll; | ||
| 531 | lc=(lc > ll); | ||
| 532 | } while (lc); | ||
| 533 | } | ||
| 534 | else | ||
| 535 | { | ||
| 536 | lc= -c1; | ||
| 537 | do { | ||
| 538 | ll=r[i]; | ||
| 539 | r[i++]=(ll-lc)&BN_MASK2; | ||
| 540 | lc=(lc > ll); | ||
| 541 | } while (lc); | ||
| 542 | } | ||
| 543 | } | ||
| 544 | if (c2 != 0) /* Add starting at r[1] */ | ||
| 545 | { | ||
| 546 | i=n; | ||
| 547 | if (c2 > 0) | ||
| 548 | { | ||
| 549 | lc=c2; | ||
| 550 | do { | ||
| 551 | ll=(r[i]+lc)&BN_MASK2; | ||
| 552 | r[i++]=ll; | ||
| 553 | lc=(lc > ll); | ||
| 554 | } while (lc); | ||
| 555 | } | ||
| 556 | else | ||
| 557 | { | ||
| 558 | lc= -c2; | ||
| 559 | do { | ||
| 560 | ll=r[i]; | ||
| 561 | r[i++]=(ll-lc)&BN_MASK2; | ||
| 562 | lc=(lc > ll); | ||
| 563 | } while (lc); | ||
| 564 | } | ||
| 565 | } | ||
| 566 | } | ||
| 567 | #endif | ||
diff --git a/src/lib/libcrypto/bn/old/bn_low.c b/src/lib/libcrypto/bn/old/bn_low.c new file mode 100644 index 0000000000..cbc406751c --- /dev/null +++ b/src/lib/libcrypto/bn/old/bn_low.c | |||
| @@ -0,0 +1,194 @@ | |||
| 1 | /* crypto/bn/bn_mul.c */ | ||
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | ||
| 3 | * All rights reserved. | ||
| 4 | * | ||
| 5 | * This package is an SSL implementation written | ||
| 6 | * by Eric Young (eay@cryptsoft.com). | ||
| 7 | * The implementation was written so as to conform with Netscapes SSL. | ||
| 8 | * | ||
| 9 | * This library is free for commercial and non-commercial use as long as | ||
| 10 | * the following conditions are aheared to. The following conditions | ||
| 11 | * apply to all code found in this distribution, be it the RC4, RSA, | ||
| 12 | * lhash, DES, etc., code; not just the SSL code. The SSL documentation | ||
| 13 | * included with this distribution is covered by the same copyright terms | ||
| 14 | * except that the holder is Tim Hudson (tjh@cryptsoft.com). | ||
| 15 | * | ||
| 16 | * Copyright remains Eric Young's, and as such any Copyright notices in | ||
| 17 | * the code are not to be removed. | ||
| 18 | * If this package is used in a product, Eric Young should be given attribution | ||
| 19 | * as the author of the parts of the library used. | ||
| 20 | * This can be in the form of a textual message at program startup or | ||
| 21 | * in documentation (online or textual) provided with the package. | ||
| 22 | * | ||
| 23 | * Redistribution and use in source and binary forms, with or without | ||
| 24 | * modification, are permitted provided that the following conditions | ||
| 25 | * are met: | ||
| 26 | * 1. Redistributions of source code must retain the copyright | ||
| 27 | * notice, this list of conditions and the following disclaimer. | ||
| 28 | * 2. Redistributions in binary form must reproduce the above copyright | ||
| 29 | * notice, this list of conditions and the following disclaimer in the | ||
| 30 | * documentation and/or other materials provided with the distribution. | ||
| 31 | * 3. All advertising materials mentioning features or use of this software | ||
| 32 | * must display the following acknowledgement: | ||
| 33 | * "This product includes cryptographic software written by | ||
| 34 | * Eric Young (eay@cryptsoft.com)" | ||
| 35 | * The word 'cryptographic' can be left out if the rouines from the library | ||
| 36 | * being used are not cryptographic related :-). | ||
| 37 | * 4. If you include any Windows specific code (or a derivative thereof) from | ||
| 38 | * the apps directory (application code) you must include an acknowledgement: | ||
| 39 | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" | ||
| 40 | * | ||
| 41 | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND | ||
| 42 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
| 43 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
| 44 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||
| 45 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
| 46 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
| 47 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
| 48 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
| 49 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
| 50 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
| 51 | * SUCH DAMAGE. | ||
| 52 | * | ||
| 53 | * The licence and distribution terms for any publically available version or | ||
| 54 | * derivative of this code cannot be changed. i.e. this code cannot simply be | ||
| 55 | * copied and put under another distribution licence | ||
| 56 | * [including the GNU Public Licence.] | ||
| 57 | */ | ||
| 58 | |||
| 59 | #include <stdio.h> | ||
| 60 | #include "cryptlib.h" | ||
| 61 | #include "bn_lcl.h" | ||
| 62 | |||
| 63 | static int bn_mm_low(BIGNUM *m,BIGNUM *A,BIGNUM *B, int num, | ||
| 64 | BIGNUM *sk,BN_CTX *ctx); | ||
| 65 | int BN_mul_low(BIGNUM *r, BIGNUM *a, BIGNUM *b,int words); | ||
| 66 | |||
| 67 | /* r must be different to a and b */ | ||
| 68 | int BN_mul_low(BIGNUM *r, BIGNUM *a, BIGNUM *b, int num) | ||
| 69 | { | ||
| 70 | BN_ULONG *ap,*bp,*rp; | ||
| 71 | BIGNUM *sk; | ||
| 72 | int j,i,n,ret; | ||
| 73 | int max,al,bl; | ||
| 74 | BN_CTX ctx; | ||
| 75 | |||
| 76 | bn_check_top(a); | ||
| 77 | bn_check_top(b); | ||
| 78 | |||
| 79 | #ifdef BN_MUL_DEBUG | ||
| 80 | printf("BN_mul_low(%d,%d,%d)\n",a->top,b->top,num); | ||
| 81 | #endif | ||
| 82 | |||
| 83 | al=a->top; | ||
| 84 | bl=b->top; | ||
| 85 | if ((al == 0) || (bl == 0)) | ||
| 86 | { | ||
| 87 | r->top=0; | ||
| 88 | return(1); | ||
| 89 | } | ||
| 90 | |||
| 91 | if ((bn_limit_bits_low > 0) && (num > bn_limit_num_low)) | ||
| 92 | { | ||
| 93 | n=BN_num_bits_word(num*2)-bn_limit_bits_low; | ||
| 94 | n*=2; | ||
| 95 | sk=(BIGNUM *)Malloc(sizeof(BIGNUM)*n); | ||
| 96 | memset(sk,0,sizeof(BIGNUM)*n); | ||
| 97 | memset(&ctx,0,sizeof(ctx)); | ||
| 98 | |||
| 99 | ret=bn_mm_low(r,a,b,num,&(sk[0]),&ctx); | ||
| 100 | for (i=0; i<n; i+=2) | ||
| 101 | { | ||
| 102 | BN_clear_free(&sk[i]); | ||
| 103 | BN_clear_free(&sk[i+1]); | ||
| 104 | } | ||
| 105 | Free(sk); | ||
| 106 | return(ret); | ||
| 107 | } | ||
| 108 | |||
| 109 | max=(al+bl); | ||
| 110 | if (bn_wexpand(r,max) == NULL) return(0); | ||
| 111 | r->neg=a->neg^b->neg; | ||
| 112 | ap=a->d; | ||
| 113 | bp=b->d; | ||
| 114 | rp=r->d; | ||
| 115 | r->top=(max > num)?num:max; | ||
| 116 | |||
| 117 | rp[al]=bn_mul_words(rp,ap,al,*(bp++)); | ||
| 118 | rp++; | ||
| 119 | j=bl; | ||
| 120 | for (i=1; i<j; i++) | ||
| 121 | { | ||
| 122 | if (al >= num--) | ||
| 123 | { | ||
| 124 | al--; | ||
| 125 | if (al <= 0) break; | ||
| 126 | } | ||
| 127 | rp[al]=bn_mul_add_words(rp,ap,al,*(bp++)); | ||
| 128 | rp++; | ||
| 129 | } | ||
| 130 | |||
| 131 | while ((r->top > 0) && (r->d[r->top-1] == 0)) | ||
| 132 | r->top--; | ||
| 133 | return(1); | ||
| 134 | } | ||
| 135 | |||
| 136 | |||
| 137 | #define t1 (sk[0]) | ||
| 138 | #define t2 (sk[1]) | ||
| 139 | |||
| 140 | /* r must be different to a and b */ | ||
| 141 | int bn_mm_low(BIGNUM *m, BIGNUM *A, BIGNUM *B, int num, BIGNUM *sk, | ||
| 142 | BN_CTX *ctx) | ||
| 143 | { | ||
| 144 | int n; /* ,sqr=0; */ | ||
| 145 | int an,bn; | ||
| 146 | BIGNUM ah,al,bh,bl; | ||
| 147 | |||
| 148 | bn_wexpand(m,num+3); | ||
| 149 | an=A->top; | ||
| 150 | bn=B->top; | ||
| 151 | |||
| 152 | #ifdef BN_MUL_DEBUG | ||
| 153 | printf("bn_mm_low(%d,%d,%d)\n",A->top,B->top,num); | ||
| 154 | #endif | ||
| 155 | |||
| 156 | n=(num+1)/2; | ||
| 157 | |||
| 158 | BN_init(&ah); BN_init(&al); BN_init(&bh); BN_init(&bl); | ||
| 159 | |||
| 160 | bn_set_low( &al,A,n); | ||
| 161 | bn_set_high(&ah,A,n); | ||
| 162 | bn_set_low( &bl,B,n); | ||
| 163 | bn_set_high(&bh,B,n); | ||
| 164 | |||
| 165 | if (num <= (bn_limit_num_low+bn_limit_num_low)) | ||
| 166 | { | ||
| 167 | BN_mul(m,&al,&bl); | ||
| 168 | BN_mul_low(&t1,&al,&bh,n); | ||
| 169 | BN_mul_low(&t2,&ah,&bl,n); | ||
| 170 | } | ||
| 171 | else | ||
| 172 | { | ||
| 173 | bn_mm(m ,&al,&bl,&(sk[2]),ctx); | ||
| 174 | bn_mm_low(&t1,&al,&bh,n,&(sk[2]),ctx); | ||
| 175 | bn_mm_low(&t2,&ah,&bl,n,&(sk[2]),ctx); | ||
| 176 | } | ||
| 177 | |||
| 178 | BN_add(&t1,&t1,&t2); | ||
| 179 | |||
| 180 | /* We will now do an evil hack instead of | ||
| 181 | * BN_lshift(&t1,&t1,n*BN_BITS2); | ||
| 182 | * BN_add(m,m,&t1); | ||
| 183 | * BN_mask_bits(m,num*BN_BITS2); | ||
| 184 | */ | ||
| 185 | bn_set_high(&ah,m,n); ah.max=num+2; | ||
| 186 | BN_add(&ah,&ah,&t1); | ||
| 187 | m->top=num; | ||
| 188 | |||
| 189 | m->neg=A->neg^B->neg; | ||
| 190 | return(1); | ||
| 191 | } | ||
| 192 | |||
| 193 | #undef t1 (sk[0]) | ||
| 194 | #undef t2 (sk[1]) | ||
diff --git a/src/lib/libcrypto/bn/old/bn_m.c b/src/lib/libcrypto/bn/old/bn_m.c new file mode 100644 index 0000000000..522beb02bc --- /dev/null +++ b/src/lib/libcrypto/bn/old/bn_m.c | |||
| @@ -0,0 +1,139 @@ | |||
| 1 | /* crypto/bn/bn_m.c */ | ||
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | ||
| 3 | * All rights reserved. | ||
| 4 | * | ||
| 5 | * This package is an SSL implementation written | ||
| 6 | * by Eric Young (eay@cryptsoft.com). | ||
| 7 | * The implementation was written so as to conform with Netscapes SSL. | ||
| 8 | * | ||
| 9 | * This library is free for commercial and non-commercial use as long as | ||
| 10 | * the following conditions are aheared to. The following conditions | ||
| 11 | * apply to all code found in this distribution, be it the RC4, RSA, | ||
| 12 | * lhash, DES, etc., code; not just the SSL code. The SSL documentation | ||
| 13 | * included with this distribution is covered by the same copyright terms | ||
| 14 | * except that the holder is Tim Hudson (tjh@cryptsoft.com). | ||
| 15 | * | ||
| 16 | * Copyright remains Eric Young's, and as such any Copyright notices in | ||
| 17 | * the code are not to be removed. | ||
| 18 | * If this package is used in a product, Eric Young should be given attribution | ||
| 19 | * as the author of the parts of the library used. | ||
| 20 | * This can be in the form of a textual message at program startup or | ||
| 21 | * in documentation (online or textual) provided with the package. | ||
| 22 | * | ||
| 23 | * Redistribution and use in source and binary forms, with or without | ||
| 24 | * modification, are permitted provided that the following conditions | ||
| 25 | * are met: | ||
| 26 | * 1. Redistributions of source code must retain the copyright | ||
| 27 | * notice, this list of conditions and the following disclaimer. | ||
| 28 | * 2. Redistributions in binary form must reproduce the above copyright | ||
| 29 | * notice, this list of conditions and the following disclaimer in the | ||
| 30 | * documentation and/or other materials provided with the distribution. | ||
| 31 | * 3. All advertising materials mentioning features or use of this software | ||
| 32 | * must display the following acknowledgement: | ||
| 33 | * "This product includes cryptographic software written by | ||
| 34 | * Eric Young (eay@cryptsoft.com)" | ||
| 35 | * The word 'cryptographic' can be left out if the rouines from the library | ||
| 36 | * being used are not cryptographic related :-). | ||
| 37 | * 4. If you include any Windows specific code (or a derivative thereof) from | ||
| 38 | * the apps directory (application code) you must include an acknowledgement: | ||
| 39 | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" | ||
| 40 | * | ||
| 41 | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND | ||
| 42 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
| 43 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
| 44 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||
| 45 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
| 46 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
| 47 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
| 48 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
| 49 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
| 50 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
| 51 | * SUCH DAMAGE. | ||
| 52 | * | ||
| 53 | * The licence and distribution terms for any publically available version or | ||
| 54 | * derivative of this code cannot be changed. i.e. this code cannot simply be | ||
| 55 | * copied and put under another distribution licence | ||
| 56 | * [including the GNU Public Licence.] | ||
| 57 | */ | ||
| 58 | |||
| 59 | #include <stdio.h> | ||
| 60 | /*#include "cryptlib.h"*/ | ||
| 61 | #include "bn_lcl.h" | ||
| 62 | |||
| 63 | #define limit_bits 5 /* 2^5, or 32 words */ | ||
| 64 | #define limit_num (1<<limit_bits) | ||
| 65 | |||
| 66 | int BN_m(BIGNUM *r, BIGNUM *a, BIGNUM *b) | ||
| 67 | { | ||
| 68 | BIGNUM *sk; | ||
| 69 | int i,n; | ||
| 70 | |||
| 71 | n=(BN_num_bits_word(a->top|b->top)-limit_bits); | ||
| 72 | n*=2; | ||
| 73 | sk=(BIGNUM *)malloc(sizeof(BIGNUM)*n); | ||
| 74 | for (i=0; i<n; i++) | ||
| 75 | BN_init(&(sk[i])); | ||
| 76 | |||
| 77 | return(BN_mm(r,a,b,&(sk[0]))); | ||
| 78 | } | ||
| 79 | |||
| 80 | #define ahal (sk[0]) | ||
| 81 | #define blbh (sk[1]) | ||
| 82 | |||
| 83 | /* r must be different to a and b */ | ||
| 84 | int BN_mm(BIGNUM *m, BIGNUM *A, BIGNUM *B, BIGNUM *sk) | ||
| 85 | { | ||
| 86 | int i,num,anum,bnum; | ||
| 87 | int an,bn; | ||
| 88 | BIGNUM ah,al,bh,bl; | ||
| 89 | |||
| 90 | an=A->top; | ||
| 91 | bn=B->top; | ||
| 92 | if ((an <= limit_num) || (bn <= limit_num)) | ||
| 93 | { | ||
| 94 | return(BN_mul(m,A,B)); | ||
| 95 | } | ||
| 96 | |||
| 97 | anum=(an>bn)?an:bn; | ||
| 98 | num=(anum)/2; | ||
| 99 | |||
| 100 | /* Are going to now chop things into 'num' word chunks. */ | ||
| 101 | bnum=num*BN_BITS2; | ||
| 102 | |||
| 103 | BN_init(&ahal); | ||
| 104 | BN_init(&blbh); | ||
| 105 | BN_init(&ah); | ||
| 106 | BN_init(&al); | ||
| 107 | BN_init(&bh); | ||
| 108 | BN_init(&bl); | ||
| 109 | |||
| 110 | al.top=num; | ||
| 111 | al.d=A->d; | ||
| 112 | ah.top=A->top-num; | ||
| 113 | ah.d= &(A->d[num]); | ||
| 114 | |||
| 115 | bl.top=num; | ||
| 116 | bl.d=B->d; | ||
| 117 | bh.top=B->top-num; | ||
| 118 | bh.d= &(B->d[num]); | ||
| 119 | |||
| 120 | BN_sub(&ahal,&ah,&al); | ||
| 121 | BN_sub(&blbh,&bl,&bh); | ||
| 122 | |||
| 123 | BN_mm(m,&ahal,&blbh,&(sk[2])); | ||
| 124 | BN_mm(&ahal,&al,&bl,&(sk[2])); | ||
| 125 | BN_mm(&blbh,&ah,&bh,&(sk[2])); | ||
| 126 | |||
| 127 | BN_add(m,m,&ahal); | ||
| 128 | BN_add(m,m,&blbh); | ||
| 129 | |||
| 130 | BN_lshift(m,m,bnum); | ||
| 131 | BN_add(m,m,&ahal); | ||
| 132 | |||
| 133 | BN_lshift(&blbh,&blbh,bnum*2); | ||
| 134 | BN_add(m,m,&blbh); | ||
| 135 | |||
| 136 | m->neg=A->neg^B->neg; | ||
| 137 | return(1); | ||
| 138 | } | ||
| 139 | |||
diff --git a/src/lib/libcrypto/bn/old/bn_mul.c.works b/src/lib/libcrypto/bn/old/bn_mul.c.works new file mode 100644 index 0000000000..6d565d44a2 --- /dev/null +++ b/src/lib/libcrypto/bn/old/bn_mul.c.works | |||
| @@ -0,0 +1,219 @@ | |||
| 1 | /* crypto/bn/bn_mul.c */ | ||
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | ||
| 3 | * All rights reserved. | ||
| 4 | * | ||
| 5 | * This package is an SSL implementation written | ||
| 6 | * by Eric Young (eay@cryptsoft.com). | ||
| 7 | * The implementation was written so as to conform with Netscapes SSL. | ||
| 8 | * | ||
| 9 | * This library is free for commercial and non-commercial use as long as | ||
| 10 | * the following conditions are aheared to. The following conditions | ||
| 11 | * apply to all code found in this distribution, be it the RC4, RSA, | ||
| 12 | * lhash, DES, etc., code; not just the SSL code. The SSL documentation | ||
| 13 | * included with this distribution is covered by the same copyright terms | ||
| 14 | * except that the holder is Tim Hudson (tjh@cryptsoft.com). | ||
| 15 | * | ||
| 16 | * Copyright remains Eric Young's, and as such any Copyright notices in | ||
| 17 | * the code are not to be removed. | ||
| 18 | * If this package is used in a product, Eric Young should be given attribution | ||
| 19 | * as the author of the parts of the library used. | ||
| 20 | * This can be in the form of a textual message at program startup or | ||
| 21 | * in documentation (online or textual) provided with the package. | ||
| 22 | * | ||
| 23 | * Redistribution and use in source and binary forms, with or without | ||
| 24 | * modification, are permitted provided that the following conditions | ||
| 25 | * are met: | ||
| 26 | * 1. Redistributions of source code must retain the copyright | ||
| 27 | * notice, this list of conditions and the following disclaimer. | ||
| 28 | * 2. Redistributions in binary form must reproduce the above copyright | ||
| 29 | * notice, this list of conditions and the following disclaimer in the | ||
| 30 | * documentation and/or other materials provided with the distribution. | ||
| 31 | * 3. All advertising materials mentioning features or use of this software | ||
| 32 | * must display the following acknowledgement: | ||
| 33 | * "This product includes cryptographic software written by | ||
| 34 | * Eric Young (eay@cryptsoft.com)" | ||
| 35 | * The word 'cryptographic' can be left out if the rouines from the library | ||
| 36 | * being used are not cryptographic related :-). | ||
| 37 | * 4. If you include any Windows specific code (or a derivative thereof) from | ||
| 38 | * the apps directory (application code) you must include an acknowledgement: | ||
| 39 | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" | ||
| 40 | * | ||
| 41 | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND | ||
| 42 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
| 43 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
| 44 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||
| 45 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
| 46 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
| 47 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
| 48 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
| 49 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
| 50 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
| 51 | * SUCH DAMAGE. | ||
| 52 | * | ||
| 53 | * The licence and distribution terms for any publically available version or | ||
| 54 | * derivative of this code cannot be changed. i.e. this code cannot simply be | ||
| 55 | * copied and put under another distribution licence | ||
| 56 | * [including the GNU Public Licence.] | ||
| 57 | */ | ||
| 58 | |||
| 59 | #include <stdio.h> | ||
| 60 | #include "cryptlib.h" | ||
| 61 | #include "bn_lcl.h" | ||
| 62 | |||
| 63 | int bn_mm(BIGNUM *m,BIGNUM *A,BIGNUM *B, BIGNUM *sk,BN_CTX *ctx); | ||
| 64 | |||
| 65 | /* r must be different to a and b */ | ||
| 66 | int BN_mul(r, a, b) | ||
| 67 | BIGNUM *r; | ||
| 68 | BIGNUM *a; | ||
| 69 | BIGNUM *b; | ||
| 70 | { | ||
| 71 | BN_ULONG *ap,*bp,*rp; | ||
| 72 | BIGNUM *sk; | ||
| 73 | int i,n,ret; | ||
| 74 | int max,al,bl; | ||
| 75 | BN_CTX ctx; | ||
| 76 | |||
| 77 | bn_check_top(a); | ||
| 78 | bn_check_top(b); | ||
| 79 | |||
| 80 | al=a->top; | ||
| 81 | bl=b->top; | ||
| 82 | if ((al == 0) || (bl == 0)) | ||
| 83 | { | ||
| 84 | r->top=0; | ||
| 85 | return(1); | ||
| 86 | } | ||
| 87 | #ifdef BN_MUL_DEBUG | ||
| 88 | printf("BN_mul(%d,%d)\n",a->top,b->top); | ||
| 89 | #endif | ||
| 90 | |||
| 91 | #ifdef BN_RECURSION | ||
| 92 | if ( (bn_limit_bits > 0) && | ||
| 93 | (bl > bn_limit_num) && (al > bn_limit_num)) | ||
| 94 | { | ||
| 95 | n=(BN_num_bits_word(al|bl)-bn_limit_bits); | ||
| 96 | n*=2; | ||
| 97 | sk=(BIGNUM *)Malloc(sizeof(BIGNUM)*n); | ||
| 98 | memset(sk,0,sizeof(BIGNUM)*n); | ||
| 99 | memset(&ctx,0,sizeof(ctx)); | ||
| 100 | |||
| 101 | ret=bn_mm(r,a,b,&(sk[0]),&ctx); | ||
| 102 | for (i=0; i<n; i+=2) | ||
| 103 | { | ||
| 104 | BN_clear_free(&sk[i]); | ||
| 105 | BN_clear_free(&sk[i+1]); | ||
| 106 | } | ||
| 107 | Free(sk); | ||
| 108 | return(ret); | ||
| 109 | } | ||
| 110 | #endif | ||
| 111 | |||
| 112 | max=(al+bl); | ||
| 113 | if (bn_wexpand(r,max) == NULL) return(0); | ||
| 114 | r->top=max; | ||
| 115 | r->neg=a->neg^b->neg; | ||
| 116 | ap=a->d; | ||
| 117 | bp=b->d; | ||
| 118 | rp=r->d; | ||
| 119 | |||
| 120 | #ifdef BN_RECURSION | ||
| 121 | if ((al == bl) && (al == 8)) | ||
| 122 | { | ||
| 123 | bn_mul_comba8(rp,ap,bp); | ||
| 124 | } | ||
| 125 | else | ||
| 126 | #endif | ||
| 127 | { | ||
| 128 | rp[al]=bn_mul_words(rp,ap,al,*(bp++)); | ||
| 129 | rp++; | ||
| 130 | for (i=1; i<bl; i++) | ||
| 131 | { | ||
| 132 | rp[al]=bn_mul_add_words(rp,ap,al,*(bp++)); | ||
| 133 | rp++; | ||
| 134 | } | ||
| 135 | } | ||
| 136 | if ((max > 0) && (r->d[max-1] == 0)) r->top--; | ||
| 137 | return(1); | ||
| 138 | } | ||
| 139 | |||
| 140 | #ifdef BN_RECURSION | ||
| 141 | |||
| 142 | #define ahal (sk[0]) | ||
| 143 | #define blbh (sk[1]) | ||
| 144 | |||
| 145 | /* r must be different to a and b */ | ||
| 146 | int bn_mm(m, A, B, sk,ctx) | ||
| 147 | BIGNUM *m,*A,*B; | ||
| 148 | BIGNUM *sk; | ||
| 149 | BN_CTX *ctx; | ||
| 150 | { | ||
| 151 | int n,num,sqr=0; | ||
| 152 | int an,bn; | ||
| 153 | BIGNUM ah,al,bh,bl; | ||
| 154 | |||
| 155 | an=A->top; | ||
| 156 | bn=B->top; | ||
| 157 | #ifdef BN_MUL_DEBUG | ||
| 158 | printf("bn_mm(%d,%d)\n",A->top,B->top); | ||
| 159 | #endif | ||
| 160 | |||
| 161 | if (A == B) sqr=1; | ||
| 162 | num=(an>bn)?an:bn; | ||
| 163 | n=(num+1)/2; | ||
| 164 | /* Are going to now chop things into 'num' word chunks. */ | ||
| 165 | |||
| 166 | BN_init(&ah); | ||
| 167 | BN_init(&al); | ||
| 168 | BN_init(&bh); | ||
| 169 | BN_init(&bl); | ||
| 170 | |||
| 171 | bn_set_low (&al,A,n); | ||
| 172 | bn_set_high(&ah,A,n); | ||
| 173 | bn_set_low (&bl,B,n); | ||
| 174 | bn_set_high(&bh,B,n); | ||
| 175 | |||
| 176 | BN_sub(&ahal,&ah,&al); | ||
| 177 | BN_sub(&blbh,&bl,&bh); | ||
| 178 | |||
| 179 | if (num <= (bn_limit_num+bn_limit_num)) | ||
| 180 | { | ||
| 181 | BN_mul(m,&ahal,&blbh); | ||
| 182 | if (sqr) | ||
| 183 | { | ||
| 184 | BN_sqr(&ahal,&al,ctx); | ||
| 185 | BN_sqr(&blbh,&ah,ctx); | ||
| 186 | } | ||
| 187 | else | ||
| 188 | { | ||
| 189 | BN_mul(&ahal,&al,&bl); | ||
| 190 | BN_mul(&blbh,&ah,&bh); | ||
| 191 | } | ||
| 192 | } | ||
| 193 | else | ||
| 194 | { | ||
| 195 | bn_mm(m,&ahal,&blbh,&(sk[2]),ctx); | ||
| 196 | bn_mm(&ahal,&al,&bl,&(sk[2]),ctx); | ||
| 197 | bn_mm(&blbh,&ah,&bh,&(sk[2]),ctx); | ||
| 198 | } | ||
| 199 | |||
| 200 | BN_add(m,m,&ahal); | ||
| 201 | BN_add(m,m,&blbh); | ||
| 202 | |||
| 203 | BN_lshift(m,m,n*BN_BITS2); | ||
| 204 | BN_lshift(&blbh,&blbh,n*BN_BITS2*2); | ||
| 205 | |||
| 206 | BN_add(m,m,&ahal); | ||
| 207 | BN_add(m,m,&blbh); | ||
| 208 | |||
| 209 | m->neg=A->neg^B->neg; | ||
| 210 | return(1); | ||
| 211 | } | ||
| 212 | #undef ahal (sk[0]) | ||
| 213 | #undef blbh (sk[1]) | ||
| 214 | |||
| 215 | #include "bn_low.c" | ||
| 216 | #include "bn_high.c" | ||
| 217 | #include "f.c" | ||
| 218 | |||
| 219 | #endif | ||
diff --git a/src/lib/libcrypto/bn/old/bn_wmul.c b/src/lib/libcrypto/bn/old/bn_wmul.c new file mode 100644 index 0000000000..a467b2f17a --- /dev/null +++ b/src/lib/libcrypto/bn/old/bn_wmul.c | |||
| @@ -0,0 +1,173 @@ | |||
| 1 | #include <stdio.h> | ||
| 2 | #include "bn_lcl.h" | ||
| 3 | |||
| 4 | #if 1 | ||
| 5 | |||
| 6 | int bn_mull(BIGNUM *r,BIGNUM *a,BIGNUM *b, BN_CTX *ctx); | ||
| 7 | |||
| 8 | int bn_mull(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx) | ||
| 9 | { | ||
| 10 | int top,i,j,k,al,bl; | ||
| 11 | BIGNUM *t; | ||
| 12 | |||
| 13 | #ifdef BN_COUNT | ||
| 14 | printf("bn_mull %d * %d\n",a->top,b->top); | ||
| 15 | #endif | ||
| 16 | |||
| 17 | bn_check_top(a); | ||
| 18 | bn_check_top(b); | ||
| 19 | bn_check_top(r); | ||
| 20 | |||
| 21 | al=a->top; | ||
| 22 | bl=b->top; | ||
| 23 | r->neg=a->neg^b->neg; | ||
| 24 | |||
| 25 | top=al+bl; | ||
| 26 | if ((al < 4) || (bl < 4)) | ||
| 27 | { | ||
| 28 | if (bn_wexpand(r,top) == NULL) return(0); | ||
| 29 | r->top=top; | ||
| 30 | bn_mul_normal(r->d,a->d,al,b->d,bl); | ||
| 31 | goto end; | ||
| 32 | } | ||
| 33 | else if (al == bl) /* A good start, they are the same size */ | ||
| 34 | goto symetric; | ||
| 35 | else | ||
| 36 | { | ||
| 37 | i=(al-bl); | ||
| 38 | if ((i == 1) && !BN_get_flags(b,BN_FLG_STATIC_DATA)) | ||
| 39 | { | ||
| 40 | bn_wexpand(b,al); | ||
| 41 | b->d[bl]=0; | ||
| 42 | bl++; | ||
| 43 | goto symetric; | ||
| 44 | } | ||
| 45 | else if ((i == -1) && !BN_get_flags(a,BN_FLG_STATIC_DATA)) | ||
| 46 | { | ||
| 47 | bn_wexpand(a,bl); | ||
| 48 | a->d[al]=0; | ||
| 49 | al++; | ||
| 50 | goto symetric; | ||
| 51 | } | ||
| 52 | } | ||
| 53 | |||
| 54 | /* asymetric and >= 4 */ | ||
| 55 | if (bn_wexpand(r,top) == NULL) return(0); | ||
| 56 | r->top=top; | ||
| 57 | bn_mul_normal(r->d,a->d,al,b->d,bl); | ||
| 58 | |||
| 59 | if (0) | ||
| 60 | { | ||
| 61 | /* symetric and > 4 */ | ||
| 62 | symetric: | ||
| 63 | if (al == 4) | ||
| 64 | { | ||
| 65 | if (bn_wexpand(r,al*2) == NULL) return(0); | ||
| 66 | r->top=top; | ||
| 67 | bn_mul_comba4(r->d,a->d,b->d); | ||
| 68 | goto end; | ||
| 69 | } | ||
| 70 | if (al == 8) | ||
| 71 | { | ||
| 72 | if (bn_wexpand(r,al*2) == NULL) return(0); | ||
| 73 | r->top=top; | ||
| 74 | bn_mul_comba8(r->d,a->d,b->d); | ||
| 75 | goto end; | ||
| 76 | } | ||
| 77 | if (al <= BN_MULL_NORMAL_SIZE) | ||
| 78 | { | ||
| 79 | if (bn_wexpand(r,al*2) == NULL) return(0); | ||
| 80 | r->top=top; | ||
| 81 | bn_mul_normal(r->d,a->d,al,b->d,bl); | ||
| 82 | goto end; | ||
| 83 | } | ||
| 84 | /* 16 or larger */ | ||
| 85 | j=BN_num_bits_word((BN_ULONG)al); | ||
| 86 | j=1<<(j-1); | ||
| 87 | k=j+j; | ||
| 88 | t= &(ctx->bn[ctx->tos]); | ||
| 89 | if (al == j) /* exact multiple */ | ||
| 90 | { | ||
| 91 | bn_wexpand(t,k*2); | ||
| 92 | bn_wexpand(r,k*2); | ||
| 93 | bn_mul_recursive(r->d,a->d,b->d,al,t->d); | ||
| 94 | } | ||
| 95 | else | ||
| 96 | { | ||
| 97 | bn_wexpand(a,k); | ||
| 98 | bn_wexpand(b,k); | ||
| 99 | bn_wexpand(t,k*4); | ||
| 100 | bn_wexpand(r,k*4); | ||
| 101 | for (i=a->top; i<k; i++) | ||
| 102 | a->d[i]=0; | ||
| 103 | for (i=b->top; i<k; i++) | ||
| 104 | b->d[i]=0; | ||
| 105 | bn_mul_part_recursive(r->d,a->d,b->d,al-j,j,t->d); | ||
| 106 | } | ||
| 107 | r->top=top; | ||
| 108 | } | ||
| 109 | end: | ||
| 110 | bn_fix_top(r); | ||
| 111 | return(1); | ||
| 112 | } | ||
| 113 | #endif | ||
| 114 | |||
| 115 | void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) | ||
| 116 | { | ||
| 117 | BN_ULONG *rr; | ||
| 118 | |||
| 119 | #ifdef BN_COUNT | ||
| 120 | printf(" bn_mul_normal %d * %d\n",na,nb); | ||
| 121 | #endif | ||
| 122 | |||
| 123 | if (na < nb) | ||
| 124 | { | ||
| 125 | int itmp; | ||
| 126 | BN_ULONG *ltmp; | ||
| 127 | |||
| 128 | itmp=na; na=nb; nb=itmp; | ||
| 129 | ltmp=a; a=b; b=ltmp; | ||
| 130 | |||
| 131 | } | ||
| 132 | rr= &(r[na]); | ||
| 133 | rr[0]=bn_mul_words(r,a,na,b[0]); | ||
| 134 | |||
| 135 | for (;;) | ||
| 136 | { | ||
| 137 | if (--nb <= 0) return; | ||
| 138 | rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]); | ||
| 139 | if (--nb <= 0) return; | ||
| 140 | rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]); | ||
| 141 | if (--nb <= 0) return; | ||
| 142 | rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]); | ||
| 143 | if (--nb <= 0) return; | ||
| 144 | rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]); | ||
| 145 | rr+=4; | ||
| 146 | r+=4; | ||
| 147 | b+=4; | ||
| 148 | } | ||
| 149 | } | ||
| 150 | |||
| 151 | #if 1 | ||
| 152 | void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | ||
| 153 | { | ||
| 154 | #ifdef BN_COUNT | ||
| 155 | printf(" bn_mul_low_normal %d * %d\n",n,n); | ||
| 156 | #endif | ||
| 157 | bn_mul_words(r,a,n,b[0]); | ||
| 158 | |||
| 159 | for (;;) | ||
| 160 | { | ||
| 161 | if (--n <= 0) return; | ||
| 162 | bn_mul_add_words(&(r[1]),a,n,b[1]); | ||
| 163 | if (--n <= 0) return; | ||
| 164 | bn_mul_add_words(&(r[2]),a,n,b[2]); | ||
| 165 | if (--n <= 0) return; | ||
| 166 | bn_mul_add_words(&(r[3]),a,n,b[3]); | ||
| 167 | if (--n <= 0) return; | ||
| 168 | bn_mul_add_words(&(r[4]),a,n,b[4]); | ||
| 169 | r+=4; | ||
| 170 | b+=4; | ||
| 171 | } | ||
| 172 | } | ||
| 173 | #endif | ||
diff --git a/src/lib/libcrypto/bn/old/build b/src/lib/libcrypto/bn/old/build new file mode 100644 index 0000000000..8cd99e5f17 --- /dev/null +++ b/src/lib/libcrypto/bn/old/build | |||
| @@ -0,0 +1,3 @@ | |||
| 1 | #!/bin/sh -x | ||
| 2 | |||
| 3 | gcc -g -I../../include test.c -L../.. -lcrypto | ||
diff --git a/src/lib/libcrypto/bn/old/info b/src/lib/libcrypto/bn/old/info new file mode 100644 index 0000000000..5ac99c3b23 --- /dev/null +++ b/src/lib/libcrypto/bn/old/info | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | Given A1A0 * B1B0 == S3S2S1S0 | ||
| 2 | |||
| 3 | S0= low(A0*B0) | ||
| 4 | S1= low( (A1-A0)*(B0-B1)) +low( A1*B1) +high(A0*B0) | ||
| 5 | S2= high((A1-A0)*(B0-B1)) +high(A1*B1) +low( A1*B1) | ||
| 6 | S3= high(A1*B1); | ||
| 7 | |||
| 8 | Assume we know S1 and S0, and can calulate A1*B1 and high((A1-A0)*(B0-B1)) | ||
| 9 | |||
| 10 | k0= S0 == low(A0*B0) | ||
| 11 | k1= S1 | ||
| 12 | k2= low( A1*B1) | ||
| 13 | k3= high(A1*B1) | ||
| 14 | k4= high((A1-A0)*(B0-B1)) | ||
| 15 | |||
| 16 | k1= low((A1-A0)*(B0-B1)) +k2 +high(A0*B0) | ||
| 17 | S2= k4 +k3 +k2 | ||
| 18 | S3= k3 | ||
| 19 | |||
| 20 | S1-k2= low((A1-A0)*(B0-B1)) +high(A0*B0) | ||
| 21 | |||
| 22 | We potentially have a carry or a borrow from S1 | ||
diff --git a/src/lib/libcrypto/bn/old/test.works b/src/lib/libcrypto/bn/old/test.works new file mode 100644 index 0000000000..127c7b415d --- /dev/null +++ b/src/lib/libcrypto/bn/old/test.works | |||
| @@ -0,0 +1,205 @@ | |||
| 1 | #include <stdio.h> | ||
| 2 | #include "cryptlib.h" | ||
| 3 | #include "bn_lcl.h" | ||
| 4 | |||
| 5 | #define SIZE 128 | ||
| 6 | |||
| 7 | #define BN_MONT_CTX_set bn_mcs | ||
| 8 | #define BN_from_montgomery bn_fm | ||
| 9 | #define BN_mod_mul_montgomery bn_mmm | ||
| 10 | #undef BN_to_montgomery | ||
| 11 | #define BN_to_montgomery(r,a,mont,ctx) bn_mmm(\ | ||
| 12 | r,a,(mont)->RR,(mont),ctx) | ||
| 13 | |||
| 14 | main() | ||
| 15 | { | ||
| 16 | BIGNUM prime,a,b,r,A,B,R; | ||
| 17 | BN_MONT_CTX *mont; | ||
| 18 | BN_CTX *ctx; | ||
| 19 | int i; | ||
| 20 | |||
| 21 | ctx=BN_CTX_new(); | ||
| 22 | BN_init(&prime); | ||
| 23 | BN_init(&a); BN_init(&b); BN_init(&r); | ||
| 24 | BN_init(&A); BN_init(&B); BN_init(&R); | ||
| 25 | |||
| 26 | BN_generate_prime(&prime,SIZE,0,NULL,NULL,NULL,NULL); | ||
| 27 | BN_rand(&A,SIZE,1,0); | ||
| 28 | BN_rand(&B,SIZE,1,0); | ||
| 29 | BN_mod(&A,&A,&prime,ctx); | ||
| 30 | BN_mod(&B,&B,&prime,ctx); | ||
| 31 | |||
| 32 | mont=BN_MONT_CTX_new(); | ||
| 33 | BN_MONT_CTX_set(mont,&prime,ctx); | ||
| 34 | |||
| 35 | BN_to_montgomery(&a,&A,mont,ctx); | ||
| 36 | BN_to_montgomery(&b,&B,mont,ctx); | ||
| 37 | |||
| 38 | BN_mul(&r,&a,&b); | ||
| 39 | BN_print_fp(stdout,&r); printf("\n"); | ||
| 40 | BN_from_montgomery(&r,&r,mont,ctx); | ||
| 41 | BN_print_fp(stdout,&r); printf("\n"); | ||
| 42 | BN_from_montgomery(&r,&r,mont,ctx); | ||
| 43 | BN_print_fp(stdout,&r); printf("\n"); | ||
| 44 | |||
| 45 | BN_mod_mul(&R,&A,&B,&prime,ctx); | ||
| 46 | |||
| 47 | BN_print_fp(stdout,&a); printf("\n"); | ||
| 48 | BN_print_fp(stdout,&b); printf("\n"); | ||
| 49 | BN_print_fp(stdout,&prime); printf("\n"); | ||
| 50 | BN_print_fp(stdout,&r); printf("\n\n"); | ||
| 51 | |||
| 52 | BN_print_fp(stdout,&A); printf("\n"); | ||
| 53 | BN_print_fp(stdout,&B); printf("\n"); | ||
| 54 | BN_print_fp(stdout,&prime); printf("\n"); | ||
| 55 | BN_print_fp(stdout,&R); printf("\n\n"); | ||
| 56 | |||
| 57 | BN_mul(&r,&a,&b); | ||
| 58 | BN_print_fp(stdout,&r); printf(" <- BA*DC\n"); | ||
| 59 | BN_copy(&A,&r); | ||
| 60 | i=SIZE/2; | ||
| 61 | BN_mask_bits(&A,i*2); | ||
| 62 | // BN_print_fp(stdout,&A); printf(" <- low(BA*DC)\n"); | ||
| 63 | bn_do_lower(&r,&a,&b,&A,i); | ||
| 64 | // BN_print_fp(stdout,&r); printf(" <- low(BA*DC)\n"); | ||
| 65 | } | ||
| 66 | |||
| 67 | int bn_mul_low(r,a,b,low,i) | ||
| 68 | BIGNUM *r,*a,*b,*low; | ||
| 69 | int i; | ||
| 70 | { | ||
| 71 | int w; | ||
| 72 | BIGNUM Kh,Km,t1,t2,h,ah,al,bh,bl,l,m,s0,s1; | ||
| 73 | |||
| 74 | BN_init(&Kh); BN_init(&Km); BN_init(&t1); BN_init(&t2); BN_init(&l); | ||
| 75 | BN_init(&ah); BN_init(&al); BN_init(&bh); BN_init(&bl); BN_init(&h); | ||
| 76 | BN_init(&m); BN_init(&s0); BN_init(&s1); | ||
| 77 | |||
| 78 | BN_copy(&al,a); BN_mask_bits(&al,i); BN_rshift(&ah,a,i); | ||
| 79 | BN_copy(&bl,b); BN_mask_bits(&bl,i); BN_rshift(&bh,b,i); | ||
| 80 | |||
| 81 | |||
| 82 | BN_sub(&t1,&al,&ah); | ||
| 83 | BN_sub(&t2,&bh,&bl); | ||
| 84 | BN_mul(&m,&t1,&t2); | ||
| 85 | BN_mul(&h,&ah,&bh); | ||
| 86 | |||
| 87 | BN_copy(&s0,low); BN_mask_bits(&s0,i); | ||
| 88 | BN_rshift(&s1,low,i); | ||
| 89 | |||
| 90 | BN_add(&t1,&h,&m); | ||
| 91 | BN_add(&t1,&t1,&s0); | ||
| 92 | |||
| 93 | BN_copy(&t2,&t1); BN_mask_bits(&t2,i); | ||
| 94 | BN_sub(&t1,&s1,&t2); | ||
| 95 | BN_lshift(&t1,&t1,i); | ||
| 96 | BN_add(&t1,&t1,&s0); | ||
| 97 | if (t1.neg) | ||
| 98 | { | ||
| 99 | BN_lshift(&t2,BN_value_one(),i*2); | ||
| 100 | BN_add(&t1,&t2,&t1); | ||
| 101 | BN_mask_bits(&t1,i*2); | ||
| 102 | } | ||
| 103 | |||
| 104 | BN_free(&Kh); BN_free(&Km); BN_free(&t1); BN_free(&t2); | ||
| 105 | BN_free(&ah); BN_free(&al); BN_free(&bh); BN_free(&bl); | ||
| 106 | } | ||
| 107 | |||
| 108 | int BN_mod_mul_montgomery(r,a,b,mont,ctx) | ||
| 109 | BIGNUM *r,*a,*b; | ||
| 110 | BN_MONT_CTX *mont; | ||
| 111 | BN_CTX *ctx; | ||
| 112 | { | ||
| 113 | BIGNUM *tmp; | ||
| 114 | |||
| 115 | tmp= &(ctx->bn[ctx->tos++]); | ||
| 116 | |||
| 117 | if (a == b) | ||
| 118 | { | ||
| 119 | if (!BN_sqr(tmp,a,ctx)) goto err; | ||
| 120 | } | ||
| 121 | else | ||
| 122 | { | ||
| 123 | if (!BN_mul(tmp,a,b)) goto err; | ||
| 124 | } | ||
| 125 | /* reduce from aRR to aR */ | ||
| 126 | if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err; | ||
| 127 | ctx->tos--; | ||
| 128 | return(1); | ||
| 129 | err: | ||
| 130 | return(0); | ||
| 131 | } | ||
| 132 | |||
| 133 | int BN_from_montgomery(r,a,mont,ctx) | ||
| 134 | BIGNUM *r; | ||
| 135 | BIGNUM *a; | ||
| 136 | BN_MONT_CTX *mont; | ||
| 137 | BN_CTX *ctx; | ||
| 138 | { | ||
| 139 | BIGNUM z1; | ||
| 140 | BIGNUM *t1,*t2; | ||
| 141 | BN_ULONG *ap,*bp,*rp; | ||
| 142 | int j,i,bl,al; | ||
| 143 | |||
| 144 | BN_init(&z1); | ||
| 145 | t1= &(ctx->bn[ctx->tos]); | ||
| 146 | t2= &(ctx->bn[ctx->tos+1]); | ||
| 147 | |||
| 148 | if (!BN_copy(t1,a)) goto err; | ||
| 149 | /* can cheat */ | ||
| 150 | BN_mask_bits(t1,mont->ri); | ||
| 151 | if (!BN_mul(t2,t1,mont->Ni)) goto err; | ||
| 152 | BN_mask_bits(t2,mont->ri); | ||
| 153 | |||
| 154 | if (!BN_mul(t1,t2,mont->N)) goto err; | ||
| 155 | if (!BN_add(t2,t1,a)) goto err; | ||
| 156 | |||
| 157 | /* At this point, t2 has the bottom ri bits set to zero. | ||
| 158 | * This means that the bottom ri bits == the 1^ri minus the bottom | ||
| 159 | * ri bits of a. | ||
| 160 | * This means that only the bits above 'ri' in a need to be added, | ||
| 161 | * and XXXXXXXXXXXXXXXXXXXXXXXX | ||
| 162 | */ | ||
| 163 | BN_print_fp(stdout,t2); printf("\n"); | ||
| 164 | BN_rshift(r,t2,mont->ri); | ||
| 165 | |||
| 166 | if (BN_ucmp(r,mont->N) >= 0) | ||
| 167 | bn_qsub(r,r,mont->N); | ||
| 168 | |||
| 169 | return(1); | ||
| 170 | err: | ||
| 171 | return(0); | ||
| 172 | } | ||
| 173 | |||
| 174 | int BN_MONT_CTX_set(mont,mod,ctx) | ||
| 175 | BN_MONT_CTX *mont; | ||
| 176 | BIGNUM *mod; | ||
| 177 | BN_CTX *ctx; | ||
| 178 | { | ||
| 179 | BIGNUM *Ri=NULL,*R=NULL; | ||
| 180 | |||
| 181 | if (mont->RR == NULL) mont->RR=BN_new(); | ||
| 182 | if (mont->N == NULL) mont->N=BN_new(); | ||
| 183 | |||
| 184 | R=mont->RR; /* grab RR as a temp */ | ||
| 185 | BN_copy(mont->N,mod); /* Set N */ | ||
| 186 | |||
| 187 | mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2; | ||
| 188 | BN_lshift(R,BN_value_one(),mont->ri); /* R */ | ||
| 189 | if ((Ri=BN_mod_inverse(NULL,R,mod,ctx)) == NULL) goto err;/* Ri */ | ||
| 190 | BN_lshift(Ri,Ri,mont->ri); /* R*Ri */ | ||
| 191 | bn_qsub(Ri,Ri,BN_value_one()); /* R*Ri - 1 */ | ||
| 192 | BN_div(Ri,NULL,Ri,mod,ctx); | ||
| 193 | if (mont->Ni != NULL) BN_free(mont->Ni); | ||
| 194 | mont->Ni=Ri; /* Ni=(R*Ri-1)/N */ | ||
| 195 | |||
| 196 | /* setup RR for conversions */ | ||
| 197 | BN_lshift(mont->RR,BN_value_one(),mont->ri*2); | ||
| 198 | BN_mod(mont->RR,mont->RR,mont->N,ctx); | ||
| 199 | |||
| 200 | return(1); | ||
| 201 | err: | ||
| 202 | return(0); | ||
| 203 | } | ||
| 204 | |||
| 205 | |||
