diff options
Diffstat (limited to 'src/lib/libcrypto/rc4/asm/rc4-parisc.pl')
| -rw-r--r-- | src/lib/libcrypto/rc4/asm/rc4-parisc.pl | 313 |
1 files changed, 313 insertions, 0 deletions
diff --git a/src/lib/libcrypto/rc4/asm/rc4-parisc.pl b/src/lib/libcrypto/rc4/asm/rc4-parisc.pl new file mode 100644 index 0000000000..9165067080 --- /dev/null +++ b/src/lib/libcrypto/rc4/asm/rc4-parisc.pl | |||
| @@ -0,0 +1,313 @@ | |||
| 1 | #!/usr/bin/env perl | ||
| 2 | |||
| 3 | # ==================================================================== | ||
| 4 | # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
| 5 | # project. The module is, however, dual licensed under OpenSSL and | ||
| 6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | ||
| 7 | # details see http://www.openssl.org/~appro/cryptogams/. | ||
| 8 | # ==================================================================== | ||
| 9 | |||
| 10 | # RC4 for PA-RISC. | ||
| 11 | |||
| 12 | # June 2009. | ||
| 13 | # | ||
| 14 | # Performance is 33% better than gcc 3.2 generated code on PA-7100LC. | ||
| 15 | # For reference, [4x] unrolled loop is >40% faster than folded one. | ||
| 16 | # It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement | ||
| 17 | # is believed to be not sufficient to justify the effort... | ||
| 18 | # | ||
| 19 | # Special thanks to polarhome.com for providing HP-UX account. | ||
| 20 | |||
| 21 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | ||
| 22 | |||
| 23 | $flavour = shift; | ||
| 24 | $output = shift; | ||
| 25 | open STDOUT,">$output"; | ||
| 26 | |||
| 27 | if ($flavour =~ /64/) { | ||
| 28 | $LEVEL ="2.0W"; | ||
| 29 | $SIZE_T =8; | ||
| 30 | $FRAME_MARKER =80; | ||
| 31 | $SAVED_RP =16; | ||
| 32 | $PUSH ="std"; | ||
| 33 | $PUSHMA ="std,ma"; | ||
| 34 | $POP ="ldd"; | ||
| 35 | $POPMB ="ldd,mb"; | ||
| 36 | } else { | ||
| 37 | $LEVEL ="1.0"; | ||
| 38 | $SIZE_T =4; | ||
| 39 | $FRAME_MARKER =48; | ||
| 40 | $SAVED_RP =20; | ||
| 41 | $PUSH ="stw"; | ||
| 42 | $PUSHMA ="stwm"; | ||
| 43 | $POP ="ldw"; | ||
| 44 | $POPMB ="ldwm"; | ||
| 45 | } | ||
| 46 | |||
| 47 | $FRAME=4*$SIZE_T+$FRAME_MARKER; # 4 saved regs + frame marker | ||
| 48 | # [+ argument transfer] | ||
| 49 | $SZ=1; # defaults to RC4_CHAR | ||
| 50 | if (open CONF,"<${dir}../../opensslconf.h") { | ||
| 51 | while(<CONF>) { | ||
| 52 | if (m/#\s*define\s+RC4_INT\s+(.*)/) { | ||
| 53 | $SZ = ($1=~/char$/) ? 1 : 4; | ||
| 54 | last; | ||
| 55 | } | ||
| 56 | } | ||
| 57 | close CONF; | ||
| 58 | } | ||
| 59 | |||
| 60 | if ($SZ==1) { # RC4_CHAR | ||
| 61 | $LD="ldb"; | ||
| 62 | $LDX="ldbx"; | ||
| 63 | $MKX="addl"; | ||
| 64 | $ST="stb"; | ||
| 65 | } else { # RC4_INT (~5% faster than RC4_CHAR on PA-7100LC) | ||
| 66 | $LD="ldw"; | ||
| 67 | $LDX="ldwx,s"; | ||
| 68 | $MKX="sh2addl"; | ||
| 69 | $ST="stw"; | ||
| 70 | } | ||
| 71 | |||
| 72 | $key="%r26"; | ||
| 73 | $len="%r25"; | ||
| 74 | $inp="%r24"; | ||
| 75 | $out="%r23"; | ||
| 76 | |||
| 77 | @XX=("%r19","%r20"); | ||
| 78 | @TX=("%r21","%r22"); | ||
| 79 | $YY="%r28"; | ||
| 80 | $TY="%r29"; | ||
| 81 | |||
| 82 | $acc="%r1"; | ||
| 83 | $ix="%r2"; | ||
| 84 | $iy="%r3"; | ||
| 85 | $dat0="%r4"; | ||
| 86 | $dat1="%r5"; | ||
| 87 | $rem="%r6"; | ||
| 88 | $mask="%r31"; | ||
| 89 | |||
| 90 | sub unrolledloopbody { | ||
| 91 | for ($i=0;$i<4;$i++) { | ||
| 92 | $code.=<<___; | ||
| 93 | ldo 1($XX[0]),$XX[1] | ||
| 94 | `sprintf("$LDX %$TY(%$key),%$dat1") if ($i>0)` | ||
| 95 | and $mask,$XX[1],$XX[1] | ||
| 96 | $LDX $YY($key),$TY | ||
| 97 | $MKX $YY,$key,$ix | ||
| 98 | $LDX $XX[1]($key),$TX[1] | ||
| 99 | $MKX $XX[0],$key,$iy | ||
| 100 | $ST $TX[0],0($ix) | ||
| 101 | comclr,<> $XX[1],$YY,%r0 ; conditional | ||
| 102 | copy $TX[0],$TX[1] ; move | ||
| 103 | `sprintf("%sdep %$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)` | ||
| 104 | $ST $TY,0($iy) | ||
| 105 | addl $TX[0],$TY,$TY | ||
| 106 | addl $TX[1],$YY,$YY | ||
| 107 | and $mask,$TY,$TY | ||
| 108 | and $mask,$YY,$YY | ||
| 109 | ___ | ||
| 110 | push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers | ||
| 111 | } } | ||
| 112 | |||
| 113 | sub foldedloop { | ||
| 114 | my ($label,$count)=@_; | ||
| 115 | $code.=<<___; | ||
| 116 | $label | ||
| 117 | $MKX $YY,$key,$iy | ||
| 118 | $LDX $YY($key),$TY | ||
| 119 | $MKX $XX[0],$key,$ix | ||
| 120 | $ST $TX[0],0($iy) | ||
| 121 | ldo 1($XX[0]),$XX[0] | ||
| 122 | $ST $TY,0($ix) | ||
| 123 | addl $TX[0],$TY,$TY | ||
| 124 | ldbx $inp($out),$dat1 | ||
| 125 | and $mask,$TY,$TY | ||
| 126 | and $mask,$XX[0],$XX[0] | ||
| 127 | $LDX $TY($key),$acc | ||
| 128 | $LDX $XX[0]($key),$TX[0] | ||
| 129 | ldo 1($out),$out | ||
| 130 | xor $dat1,$acc,$acc | ||
| 131 | addl $TX[0],$YY,$YY | ||
| 132 | stb $acc,-1($out) | ||
| 133 | addib,<> -1,$count,$label ; $count is always small | ||
| 134 | and $mask,$YY,$YY | ||
| 135 | ___ | ||
| 136 | } | ||
| 137 | |||
| 138 | $code=<<___; | ||
| 139 | .LEVEL $LEVEL | ||
| 140 | .SPACE \$TEXT\$ | ||
| 141 | .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY | ||
| 142 | |||
| 143 | .EXPORT RC4,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR | ||
| 144 | RC4 | ||
| 145 | .PROC | ||
| 146 | .CALLINFO FRAME=`$FRAME-4*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=6 | ||
| 147 | .ENTRY | ||
| 148 | $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue | ||
| 149 | $PUSHMA %r3,$FRAME(%sp) | ||
| 150 | $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) | ||
| 151 | $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) | ||
| 152 | $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) | ||
| 153 | |||
| 154 | cmpib,*= 0,$len,L\$abort | ||
| 155 | sub $inp,$out,$inp ; distance between $inp and $out | ||
| 156 | |||
| 157 | $LD `0*$SZ`($key),$XX[0] | ||
| 158 | $LD `1*$SZ`($key),$YY | ||
| 159 | ldo `2*$SZ`($key),$key | ||
| 160 | |||
| 161 | ldi 0xff,$mask | ||
| 162 | ldi 3,$dat0 | ||
| 163 | |||
| 164 | ldo 1($XX[0]),$XX[0] ; warm up loop | ||
| 165 | and $mask,$XX[0],$XX[0] | ||
| 166 | $LDX $XX[0]($key),$TX[0] | ||
| 167 | addl $TX[0],$YY,$YY | ||
| 168 | cmpib,*>>= 6,$len,L\$oop1 ; is $len large enough to bother? | ||
| 169 | and $mask,$YY,$YY | ||
| 170 | |||
| 171 | and,<> $out,$dat0,$rem ; is $out aligned? | ||
| 172 | b L\$alignedout | ||
| 173 | subi 4,$rem,$rem | ||
| 174 | sub $len,$rem,$len | ||
| 175 | ___ | ||
| 176 | &foldedloop("L\$alignout",$rem); # process till $out is aligned | ||
| 177 | |||
| 178 | $code.=<<___; | ||
| 179 | L\$alignedout ; $len is at least 4 here | ||
| 180 | and,<> $inp,$dat0,$acc ; is $inp aligned? | ||
| 181 | b L\$oop4 | ||
| 182 | sub $inp,$acc,$rem ; align $inp | ||
| 183 | |||
| 184 | sh3addl $acc,%r0,$acc | ||
| 185 | subi 32,$acc,$acc | ||
| 186 | mtctl $acc,%cr11 ; load %sar with vshd align factor | ||
| 187 | ldwx $rem($out),$dat0 | ||
| 188 | ldo 4($rem),$rem | ||
| 189 | L\$oop4misalignedinp | ||
| 190 | ___ | ||
| 191 | &unrolledloopbody(); | ||
| 192 | $code.=<<___; | ||
| 193 | $LDX $TY($key),$ix | ||
| 194 | ldwx $rem($out),$dat1 | ||
| 195 | ldo -4($len),$len | ||
| 196 | or $ix,$acc,$acc ; last piece, no need to dep | ||
| 197 | vshd $dat0,$dat1,$iy ; align data | ||
| 198 | copy $dat1,$dat0 | ||
| 199 | xor $iy,$acc,$acc | ||
| 200 | stw $acc,0($out) | ||
| 201 | cmpib,*<< 3,$len,L\$oop4misalignedinp | ||
| 202 | ldo 4($out),$out | ||
| 203 | cmpib,*= 0,$len,L\$done | ||
| 204 | nop | ||
| 205 | b L\$oop1 | ||
| 206 | nop | ||
| 207 | |||
| 208 | .ALIGN 8 | ||
| 209 | L\$oop4 | ||
| 210 | ___ | ||
| 211 | &unrolledloopbody(); | ||
| 212 | $code.=<<___; | ||
| 213 | $LDX $TY($key),$ix | ||
| 214 | ldwx $inp($out),$dat0 | ||
| 215 | ldo -4($len),$len | ||
| 216 | or $ix,$acc,$acc ; last piece, no need to dep | ||
| 217 | xor $dat0,$acc,$acc | ||
| 218 | stw $acc,0($out) | ||
| 219 | cmpib,*<< 3,$len,L\$oop4 | ||
| 220 | ldo 4($out),$out | ||
| 221 | cmpib,*= 0,$len,L\$done | ||
| 222 | nop | ||
| 223 | ___ | ||
| 224 | &foldedloop("L\$oop1",$len); | ||
| 225 | $code.=<<___; | ||
| 226 | L\$done | ||
| 227 | $POP `-$FRAME-$SAVED_RP`(%sp),%r2 | ||
| 228 | ldo -1($XX[0]),$XX[0] ; chill out loop | ||
| 229 | sub $YY,$TX[0],$YY | ||
| 230 | and $mask,$XX[0],$XX[0] | ||
| 231 | and $mask,$YY,$YY | ||
| 232 | $ST $XX[0],`-2*$SZ`($key) | ||
| 233 | $ST $YY,`-1*$SZ`($key) | ||
| 234 | $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 | ||
| 235 | $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 | ||
| 236 | $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 | ||
| 237 | L\$abort | ||
| 238 | bv (%r2) | ||
| 239 | .EXIT | ||
| 240 | $POPMB -$FRAME(%sp),%r3 | ||
| 241 | .PROCEND | ||
| 242 | ___ | ||
| 243 | |||
| 244 | $code.=<<___; | ||
| 245 | |||
| 246 | .EXPORT private_RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR | ||
| 247 | .ALIGN 8 | ||
| 248 | private_RC4_set_key | ||
| 249 | .PROC | ||
| 250 | .CALLINFO NO_CALLS | ||
| 251 | .ENTRY | ||
| 252 | $ST %r0,`0*$SZ`($key) | ||
| 253 | $ST %r0,`1*$SZ`($key) | ||
| 254 | ldo `2*$SZ`($key),$key | ||
| 255 | copy %r0,@XX[0] | ||
| 256 | L\$1st | ||
| 257 | $ST @XX[0],0($key) | ||
| 258 | ldo 1(@XX[0]),@XX[0] | ||
| 259 | bb,>= @XX[0],`31-8`,L\$1st ; @XX[0]<256 | ||
| 260 | ldo $SZ($key),$key | ||
| 261 | |||
| 262 | ldo `-256*$SZ`($key),$key ; rewind $key | ||
| 263 | addl $len,$inp,$inp ; $inp to point at the end | ||
| 264 | sub %r0,$len,%r23 ; inverse index | ||
| 265 | copy %r0,@XX[0] | ||
| 266 | copy %r0,@XX[1] | ||
| 267 | ldi 0xff,$mask | ||
| 268 | |||
| 269 | L\$2nd | ||
| 270 | $LDX @XX[0]($key),@TX[0] | ||
| 271 | ldbx %r23($inp),@TX[1] | ||
| 272 | addi,nuv 1,%r23,%r23 ; increment and conditional | ||
| 273 | sub %r0,$len,%r23 ; inverse index | ||
| 274 | addl @TX[0],@XX[1],@XX[1] | ||
| 275 | addl @TX[1],@XX[1],@XX[1] | ||
| 276 | and $mask,@XX[1],@XX[1] | ||
| 277 | $MKX @XX[0],$key,$TY | ||
| 278 | $LDX @XX[1]($key),@TX[1] | ||
| 279 | $MKX @XX[1],$key,$YY | ||
| 280 | ldo 1(@XX[0]),@XX[0] | ||
| 281 | $ST @TX[0],0($YY) | ||
| 282 | bb,>= @XX[0],`31-8`,L\$2nd ; @XX[0]<256 | ||
| 283 | $ST @TX[1],0($TY) | ||
| 284 | |||
| 285 | bv,n (%r2) | ||
| 286 | .EXIT | ||
| 287 | nop | ||
| 288 | .PROCEND | ||
| 289 | |||
| 290 | .EXPORT RC4_options,ENTRY | ||
| 291 | .ALIGN 8 | ||
| 292 | RC4_options | ||
| 293 | .PROC | ||
| 294 | .CALLINFO NO_CALLS | ||
| 295 | .ENTRY | ||
| 296 | blr %r0,%r28 | ||
| 297 | ldi 3,%r1 | ||
| 298 | L\$pic | ||
| 299 | andcm %r28,%r1,%r28 | ||
| 300 | bv (%r2) | ||
| 301 | .EXIT | ||
| 302 | ldo L\$opts-L\$pic(%r28),%r28 | ||
| 303 | .PROCEND | ||
| 304 | .ALIGN 8 | ||
| 305 | L\$opts | ||
| 306 | .STRINGZ "rc4(4x,`$SZ==1?"char":"int"`)" | ||
| 307 | .STRINGZ "RC4 for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>" | ||
| 308 | ___ | ||
| 309 | $code =~ s/\`([^\`]*)\`/eval $1/gem; | ||
| 310 | $code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4); | ||
| 311 | |||
| 312 | print $code; | ||
| 313 | close STDOUT; | ||
