diff options
| author | cvs2svn <admin@example.com> | 2015-03-08 16:48:48 +0000 |
|---|---|---|
| committer | cvs2svn <admin@example.com> | 2015-03-08 16:48:48 +0000 |
| commit | da1a9ad3a4a867ba6569c05e6fca66d7f296c553 (patch) | |
| tree | 44872802e872bdfd60730fa9cf01d9d5751251c1 /src/lib/libcrypto/bn/asm/alpha-mont.pl | |
| parent | 973703db67a8e73d70e63afa8f2cde19da09144d (diff) | |
| download | openbsd-OPENBSD_5_7_BASE.tar.gz openbsd-OPENBSD_5_7_BASE.tar.bz2 openbsd-OPENBSD_5_7_BASE.zip | |
This commit was manufactured by cvs2git to create tag 'OPENBSD_5_7_BASE'.OPENBSD_5_7_BASE
Diffstat (limited to 'src/lib/libcrypto/bn/asm/alpha-mont.pl')
| -rw-r--r-- | src/lib/libcrypto/bn/asm/alpha-mont.pl | 316 |
1 files changed, 0 insertions, 316 deletions
diff --git a/src/lib/libcrypto/bn/asm/alpha-mont.pl b/src/lib/libcrypto/bn/asm/alpha-mont.pl deleted file mode 100644 index 41700d5bd5..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha-mont.pl +++ /dev/null | |||
| @@ -1,316 +0,0 @@ | |||
| 1 | #!/usr/bin/env perl | ||
| 2 | # | ||
| 3 | # ==================================================================== | ||
| 4 | # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
| 5 | # project. The module is, however, dual licensed under OpenSSL and | ||
| 6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | ||
| 7 | # details see http://www.openssl.org/~appro/cryptogams/. | ||
| 8 | # ==================================================================== | ||
| 9 | # | ||
| 10 | # On 21264 RSA sign performance improves by 70/35/20/15 percent for | ||
| 11 | # 512/1024/2048/4096 bit key lengths. This is against vendor compiler | ||
| 12 | # instructed to '-tune host' code with in-line assembler. Other | ||
| 13 | # benchmarks improve by 15-20%. To anchor it to something else, the | ||
| 14 | # code provides approximately the same performance per GHz as AMD64. | ||
| 15 | # I.e. if you compare 1GHz 21264 and 2GHz Opteron, you'll observe ~2x | ||
| 16 | # difference. | ||
| 17 | |||
| 18 | # int bn_mul_mont( | ||
| 19 | $rp="a0"; # BN_ULONG *rp, | ||
| 20 | $ap="a1"; # const BN_ULONG *ap, | ||
| 21 | $bp="a2"; # const BN_ULONG *bp, | ||
| 22 | $np="a3"; # const BN_ULONG *np, | ||
| 23 | $n0="a4"; # const BN_ULONG *n0, | ||
| 24 | $num="a5"; # int num); | ||
| 25 | |||
| 26 | $lo0="t0"; | ||
| 27 | $hi0="t1"; | ||
| 28 | $lo1="t2"; | ||
| 29 | $hi1="t3"; | ||
| 30 | $aj="t4"; | ||
| 31 | $bi="t5"; | ||
| 32 | $nj="t6"; | ||
| 33 | $tp="t7"; | ||
| 34 | $alo="t8"; | ||
| 35 | $ahi="t9"; | ||
| 36 | $nlo="t10"; | ||
| 37 | $nhi="t11"; | ||
| 38 | $tj="t12"; | ||
| 39 | $i="s3"; | ||
| 40 | $j="s4"; | ||
| 41 | $m1="s5"; | ||
| 42 | |||
| 43 | $code=<<___; | ||
| 44 | #include <machine/asm.h> | ||
| 45 | |||
| 46 | .text | ||
| 47 | |||
| 48 | .set noat | ||
| 49 | .set noreorder | ||
| 50 | |||
| 51 | .globl bn_mul_mont | ||
| 52 | .align 5 | ||
| 53 | .ent bn_mul_mont | ||
| 54 | bn_mul_mont: | ||
| 55 | lda sp,-48(sp) | ||
| 56 | stq ra,0(sp) | ||
| 57 | stq s3,8(sp) | ||
| 58 | stq s4,16(sp) | ||
| 59 | stq s5,24(sp) | ||
| 60 | stq fp,32(sp) | ||
| 61 | mov sp,fp | ||
| 62 | .mask 0x0400f000,-48 | ||
| 63 | .frame fp,48,ra | ||
| 64 | .prologue 0 | ||
| 65 | |||
| 66 | .align 4 | ||
| 67 | .set reorder | ||
| 68 | sextl $num,$num | ||
| 69 | mov 0,v0 | ||
| 70 | cmplt $num,4,AT | ||
| 71 | bne AT,.Lexit | ||
| 72 | |||
| 73 | ldq $hi0,0($ap) # ap[0] | ||
| 74 | s8addq $num,16,AT | ||
| 75 | ldq $aj,8($ap) | ||
| 76 | subq sp,AT,sp | ||
| 77 | ldq $bi,0($bp) # bp[0] | ||
| 78 | lda AT,-4096(zero) # mov -4096,AT | ||
| 79 | ldq $n0,0($n0) | ||
| 80 | and sp,AT,sp | ||
| 81 | |||
| 82 | mulq $hi0,$bi,$lo0 | ||
| 83 | ldq $hi1,0($np) # np[0] | ||
| 84 | umulh $hi0,$bi,$hi0 | ||
| 85 | ldq $nj,8($np) | ||
| 86 | |||
| 87 | mulq $lo0,$n0,$m1 | ||
| 88 | |||
| 89 | mulq $hi1,$m1,$lo1 | ||
| 90 | umulh $hi1,$m1,$hi1 | ||
| 91 | |||
| 92 | addq $lo1,$lo0,$lo1 | ||
| 93 | cmpult $lo1,$lo0,AT | ||
| 94 | addq $hi1,AT,$hi1 | ||
| 95 | |||
| 96 | mulq $aj,$bi,$alo | ||
| 97 | mov 2,$j | ||
| 98 | umulh $aj,$bi,$ahi | ||
| 99 | mov sp,$tp | ||
| 100 | |||
| 101 | mulq $nj,$m1,$nlo | ||
| 102 | s8addq $j,$ap,$aj | ||
| 103 | umulh $nj,$m1,$nhi | ||
| 104 | s8addq $j,$np,$nj | ||
| 105 | .align 4 | ||
| 106 | .L1st: | ||
| 107 | .set noreorder | ||
| 108 | ldq $aj,0($aj) | ||
| 109 | addl $j,1,$j | ||
| 110 | ldq $nj,0($nj) | ||
| 111 | lda $tp,8($tp) | ||
| 112 | |||
| 113 | addq $alo,$hi0,$lo0 | ||
| 114 | mulq $aj,$bi,$alo | ||
| 115 | cmpult $lo0,$hi0,AT | ||
| 116 | addq $nlo,$hi1,$lo1 | ||
| 117 | |||
| 118 | mulq $nj,$m1,$nlo | ||
| 119 | addq $ahi,AT,$hi0 | ||
| 120 | cmpult $lo1,$hi1,v0 | ||
| 121 | cmplt $j,$num,$tj | ||
| 122 | |||
| 123 | umulh $aj,$bi,$ahi | ||
| 124 | addq $nhi,v0,$hi1 | ||
| 125 | addq $lo1,$lo0,$lo1 | ||
| 126 | s8addq $j,$ap,$aj | ||
| 127 | |||
| 128 | umulh $nj,$m1,$nhi | ||
| 129 | cmpult $lo1,$lo0,v0 | ||
| 130 | addq $hi1,v0,$hi1 | ||
| 131 | s8addq $j,$np,$nj | ||
| 132 | |||
| 133 | stq $lo1,-8($tp) | ||
| 134 | nop | ||
| 135 | unop | ||
| 136 | bne $tj,.L1st | ||
| 137 | .set reorder | ||
| 138 | |||
| 139 | addq $alo,$hi0,$lo0 | ||
| 140 | addq $nlo,$hi1,$lo1 | ||
| 141 | cmpult $lo0,$hi0,AT | ||
| 142 | cmpult $lo1,$hi1,v0 | ||
| 143 | addq $ahi,AT,$hi0 | ||
| 144 | addq $nhi,v0,$hi1 | ||
| 145 | |||
| 146 | addq $lo1,$lo0,$lo1 | ||
| 147 | cmpult $lo1,$lo0,v0 | ||
| 148 | addq $hi1,v0,$hi1 | ||
| 149 | |||
| 150 | stq $lo1,0($tp) | ||
| 151 | |||
| 152 | addq $hi1,$hi0,$hi1 | ||
| 153 | cmpult $hi1,$hi0,AT | ||
| 154 | stq $hi1,8($tp) | ||
| 155 | stq AT,16($tp) | ||
| 156 | |||
| 157 | mov 1,$i | ||
| 158 | .align 4 | ||
| 159 | .Louter: | ||
| 160 | s8addq $i,$bp,$bi | ||
| 161 | ldq $hi0,0($ap) | ||
| 162 | ldq $aj,8($ap) | ||
| 163 | ldq $bi,0($bi) | ||
| 164 | ldq $hi1,0($np) | ||
| 165 | ldq $nj,8($np) | ||
| 166 | ldq $tj,0(sp) | ||
| 167 | |||
| 168 | mulq $hi0,$bi,$lo0 | ||
| 169 | umulh $hi0,$bi,$hi0 | ||
| 170 | |||
| 171 | addq $lo0,$tj,$lo0 | ||
| 172 | cmpult $lo0,$tj,AT | ||
| 173 | addq $hi0,AT,$hi0 | ||
| 174 | |||
| 175 | mulq $lo0,$n0,$m1 | ||
| 176 | |||
| 177 | mulq $hi1,$m1,$lo1 | ||
| 178 | umulh $hi1,$m1,$hi1 | ||
| 179 | |||
| 180 | addq $lo1,$lo0,$lo1 | ||
| 181 | cmpult $lo1,$lo0,AT | ||
| 182 | mov 2,$j | ||
| 183 | addq $hi1,AT,$hi1 | ||
| 184 | |||
| 185 | mulq $aj,$bi,$alo | ||
| 186 | mov sp,$tp | ||
| 187 | umulh $aj,$bi,$ahi | ||
| 188 | |||
| 189 | mulq $nj,$m1,$nlo | ||
| 190 | s8addq $j,$ap,$aj | ||
| 191 | umulh $nj,$m1,$nhi | ||
| 192 | .align 4 | ||
| 193 | .Linner: | ||
| 194 | .set noreorder | ||
| 195 | ldq $tj,8($tp) #L0 | ||
| 196 | nop #U1 | ||
| 197 | ldq $aj,0($aj) #L1 | ||
| 198 | s8addq $j,$np,$nj #U0 | ||
| 199 | |||
| 200 | ldq $nj,0($nj) #L0 | ||
| 201 | nop #U1 | ||
| 202 | addq $alo,$hi0,$lo0 #L1 | ||
| 203 | lda $tp,8($tp) | ||
| 204 | |||
| 205 | mulq $aj,$bi,$alo #U1 | ||
| 206 | cmpult $lo0,$hi0,AT #L0 | ||
| 207 | addq $nlo,$hi1,$lo1 #L1 | ||
| 208 | addl $j,1,$j | ||
| 209 | |||
| 210 | mulq $nj,$m1,$nlo #U1 | ||
| 211 | addq $ahi,AT,$hi0 #L0 | ||
| 212 | addq $lo0,$tj,$lo0 #L1 | ||
| 213 | cmpult $lo1,$hi1,v0 #U0 | ||
| 214 | |||
| 215 | umulh $aj,$bi,$ahi #U1 | ||
| 216 | cmpult $lo0,$tj,AT #L0 | ||
| 217 | addq $lo1,$lo0,$lo1 #L1 | ||
| 218 | addq $nhi,v0,$hi1 #U0 | ||
| 219 | |||
| 220 | umulh $nj,$m1,$nhi #U1 | ||
| 221 | s8addq $j,$ap,$aj #L0 | ||
| 222 | cmpult $lo1,$lo0,v0 #L1 | ||
| 223 | cmplt $j,$num,$tj #U0 # borrow $tj | ||
| 224 | |||
| 225 | addq $hi0,AT,$hi0 #L0 | ||
| 226 | addq $hi1,v0,$hi1 #U1 | ||
| 227 | stq $lo1,-8($tp) #L1 | ||
| 228 | bne $tj,.Linner #U0 | ||
| 229 | .set reorder | ||
| 230 | |||
| 231 | ldq $tj,8($tp) | ||
| 232 | addq $alo,$hi0,$lo0 | ||
| 233 | addq $nlo,$hi1,$lo1 | ||
| 234 | cmpult $lo0,$hi0,AT | ||
| 235 | cmpult $lo1,$hi1,v0 | ||
| 236 | addq $ahi,AT,$hi0 | ||
| 237 | addq $nhi,v0,$hi1 | ||
| 238 | |||
| 239 | addq $lo0,$tj,$lo0 | ||
| 240 | cmpult $lo0,$tj,AT | ||
| 241 | addq $hi0,AT,$hi0 | ||
| 242 | |||
| 243 | ldq $tj,16($tp) | ||
| 244 | addq $lo1,$lo0,$j | ||
| 245 | cmpult $j,$lo0,v0 | ||
| 246 | addq $hi1,v0,$hi1 | ||
| 247 | |||
| 248 | addq $hi1,$hi0,$lo1 | ||
| 249 | stq $j,0($tp) | ||
| 250 | cmpult $lo1,$hi0,$hi1 | ||
| 251 | addq $lo1,$tj,$lo1 | ||
| 252 | cmpult $lo1,$tj,AT | ||
| 253 | addl $i,1,$i | ||
| 254 | addq $hi1,AT,$hi1 | ||
| 255 | stq $lo1,8($tp) | ||
| 256 | cmplt $i,$num,$tj # borrow $tj | ||
| 257 | stq $hi1,16($tp) | ||
| 258 | bne $tj,.Louter | ||
| 259 | |||
| 260 | s8addq $num,sp,$tj # &tp[num] | ||
| 261 | mov $rp,$bp # put rp aside | ||
| 262 | mov sp,$tp | ||
| 263 | mov sp,$ap | ||
| 264 | mov 0,$hi0 # clear borrow bit | ||
| 265 | |||
| 266 | .align 4 | ||
| 267 | .Lsub: ldq $lo0,0($tp) | ||
| 268 | ldq $lo1,0($np) | ||
| 269 | lda $tp,8($tp) | ||
| 270 | lda $np,8($np) | ||
| 271 | subq $lo0,$lo1,$lo1 # tp[i]-np[i] | ||
| 272 | cmpult $lo0,$lo1,AT | ||
| 273 | subq $lo1,$hi0,$lo0 | ||
| 274 | cmpult $lo1,$lo0,$hi0 | ||
| 275 | or $hi0,AT,$hi0 | ||
| 276 | stq $lo0,0($rp) | ||
| 277 | cmpult $tp,$tj,v0 | ||
| 278 | lda $rp,8($rp) | ||
| 279 | bne v0,.Lsub | ||
| 280 | |||
| 281 | subq $hi1,$hi0,$hi0 # handle upmost overflow bit | ||
| 282 | mov sp,$tp | ||
| 283 | mov $bp,$rp # restore rp | ||
| 284 | |||
| 285 | and sp,$hi0,$ap | ||
| 286 | bic $bp,$hi0,$bp | ||
| 287 | bis $bp,$ap,$ap # ap=borrow?tp:rp | ||
| 288 | |||
| 289 | .align 4 | ||
| 290 | .Lcopy: ldq $aj,0($ap) # copy or in-place refresh | ||
| 291 | lda $tp,8($tp) | ||
| 292 | lda $rp,8($rp) | ||
| 293 | lda $ap,8($ap) | ||
| 294 | stq zero,-8($tp) # zap tp | ||
| 295 | cmpult $tp,$tj,AT | ||
| 296 | stq $aj,-8($rp) | ||
| 297 | bne AT,.Lcopy | ||
| 298 | mov 1,v0 | ||
| 299 | |||
| 300 | .Lexit: | ||
| 301 | .set noreorder | ||
| 302 | mov fp,sp | ||
| 303 | /*ldq ra,0(sp)*/ | ||
| 304 | ldq s3,8(sp) | ||
| 305 | ldq s4,16(sp) | ||
| 306 | ldq s5,24(sp) | ||
| 307 | ldq fp,32(sp) | ||
| 308 | lda sp,48(sp) | ||
| 309 | ret (ra) | ||
| 310 | .end bn_mul_mont | ||
| 311 | .ascii "Montgomery Multiplication for Alpha, CRYPTOGAMS by <appro\@openssl.org>" | ||
| 312 | .align 2 | ||
| 313 | ___ | ||
| 314 | |||
| 315 | print $code; | ||
| 316 | close STDOUT; | ||
