diff options
Diffstat (limited to 'src/lib/libcrypto/rc4/asm/rc4-parisc.pl')
-rw-r--r-- | src/lib/libcrypto/rc4/asm/rc4-parisc.pl | 320 |
1 files changed, 0 insertions, 320 deletions
diff --git a/src/lib/libcrypto/rc4/asm/rc4-parisc.pl b/src/lib/libcrypto/rc4/asm/rc4-parisc.pl deleted file mode 100644 index 7e7974430a..0000000000 --- a/src/lib/libcrypto/rc4/asm/rc4-parisc.pl +++ /dev/null | |||
@@ -1,320 +0,0 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | |||
3 | # ==================================================================== | ||
4 | # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
5 | # project. The module is, however, dual licensed under OpenSSL and | ||
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | ||
7 | # details see http://www.openssl.org/~appro/cryptogams/. | ||
8 | # ==================================================================== | ||
9 | |||
10 | # RC4 for PA-RISC. | ||
11 | |||
12 | # June 2009. | ||
13 | # | ||
14 | # Performance is 33% better than gcc 3.2 generated code on PA-7100LC. | ||
15 | # For reference, [4x] unrolled loop is >40% faster than folded one. | ||
16 | # It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement | ||
17 | # is believed to be not sufficient to justify the effort... | ||
18 | # | ||
19 | # Special thanks to polarhome.com for providing HP-UX account. | ||
20 | |||
21 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | ||
22 | |||
23 | $flavour = shift; | ||
24 | $output = shift; | ||
25 | open STDOUT,">$output"; | ||
26 | |||
27 | if ($flavour =~ /64/) { | ||
28 | $LEVEL ="2.0W"; | ||
29 | $SIZE_T =8; | ||
30 | $FRAME_MARKER =80; | ||
31 | $SAVED_RP =16; | ||
32 | $PUSH ="std"; | ||
33 | $PUSHMA ="std,ma"; | ||
34 | $POP ="ldd"; | ||
35 | $POPMB ="ldd,mb"; | ||
36 | } else { | ||
37 | $LEVEL ="1.0"; | ||
38 | $SIZE_T =4; | ||
39 | $FRAME_MARKER =48; | ||
40 | $SAVED_RP =20; | ||
41 | $PUSH ="stw"; | ||
42 | $PUSHMA ="stwm"; | ||
43 | $POP ="ldw"; | ||
44 | $POPMB ="ldwm"; | ||
45 | } | ||
46 | |||
47 | $FRAME=4*$SIZE_T+$FRAME_MARKER; # 4 saved regs + frame marker | ||
48 | # [+ argument transfer] | ||
49 | $SZ=1; # defaults to RC4_CHAR | ||
50 | if (open CONF,"<${dir}../../opensslconf.h") { | ||
51 | while(<CONF>) { | ||
52 | if (m/#\s*define\s+RC4_INT\s+(.*)/) { | ||
53 | $SZ = ($1=~/char$/) ? 1 : 4; | ||
54 | last; | ||
55 | } | ||
56 | } | ||
57 | close CONF; | ||
58 | } | ||
59 | |||
60 | if ($SZ==1) { # RC4_CHAR | ||
61 | $LD="ldb"; | ||
62 | $LDX="ldbx"; | ||
63 | $MKX="addl"; | ||
64 | $ST="stb"; | ||
65 | } else { # RC4_INT (~5% faster than RC4_CHAR on PA-7100LC) | ||
66 | $LD="ldw"; | ||
67 | $LDX="ldwx,s"; | ||
68 | $MKX="sh2addl"; | ||
69 | $ST="stw"; | ||
70 | } | ||
71 | |||
72 | $key="%r26"; | ||
73 | $len="%r25"; | ||
74 | $inp="%r24"; | ||
75 | $out="%r23"; | ||
76 | |||
77 | @XX=("%r19","%r20"); | ||
78 | @TX=("%r21","%r22"); | ||
79 | $YY="%r28"; | ||
80 | $TY="%r29"; | ||
81 | |||
82 | $acc="%r1"; | ||
83 | $ix="%r2"; | ||
84 | $iy="%r3"; | ||
85 | $dat0="%r4"; | ||
86 | $dat1="%r5"; | ||
87 | $rem="%r6"; | ||
88 | $mask="%r31"; | ||
89 | |||
90 | sub unrolledloopbody { | ||
91 | for ($i=0;$i<4;$i++) { | ||
92 | $code.=<<___; | ||
93 | ldo 1($XX[0]),$XX[1] | ||
94 | `sprintf("$LDX %$TY(%$key),%$dat1") if ($i>0)` | ||
95 | and $mask,$XX[1],$XX[1] | ||
96 | $LDX $YY($key),$TY | ||
97 | $MKX $YY,$key,$ix | ||
98 | $LDX $XX[1]($key),$TX[1] | ||
99 | $MKX $XX[0],$key,$iy | ||
100 | $ST $TX[0],0($ix) | ||
101 | comclr,<> $XX[1],$YY,%r0 ; conditional | ||
102 | copy $TX[0],$TX[1] ; move | ||
103 | `sprintf("%sdep %$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)` | ||
104 | $ST $TY,0($iy) | ||
105 | addl $TX[0],$TY,$TY | ||
106 | addl $TX[1],$YY,$YY | ||
107 | and $mask,$TY,$TY | ||
108 | and $mask,$YY,$YY | ||
109 | ___ | ||
110 | push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers | ||
111 | } } | ||
112 | |||
113 | sub foldedloop { | ||
114 | my ($label,$count)=@_; | ||
115 | $code.=<<___; | ||
116 | $label | ||
117 | $MKX $YY,$key,$iy | ||
118 | $LDX $YY($key),$TY | ||
119 | $MKX $XX[0],$key,$ix | ||
120 | $ST $TX[0],0($iy) | ||
121 | ldo 1($XX[0]),$XX[0] | ||
122 | $ST $TY,0($ix) | ||
123 | addl $TX[0],$TY,$TY | ||
124 | ldbx $inp($out),$dat1 | ||
125 | and $mask,$TY,$TY | ||
126 | and $mask,$XX[0],$XX[0] | ||
127 | $LDX $TY($key),$acc | ||
128 | $LDX $XX[0]($key),$TX[0] | ||
129 | ldo 1($out),$out | ||
130 | xor $dat1,$acc,$acc | ||
131 | addl $TX[0],$YY,$YY | ||
132 | stb $acc,-1($out) | ||
133 | addib,<> -1,$count,$label ; $count is always small | ||
134 | and $mask,$YY,$YY | ||
135 | ___ | ||
136 | } | ||
137 | |||
138 | $code=<<___; | ||
139 | .LEVEL $LEVEL | ||
140 | #if 0 | ||
141 | .SPACE \$TEXT\$ | ||
142 | .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY | ||
143 | #else | ||
144 | .text | ||
145 | #endif | ||
146 | |||
147 | .EXPORT RC4,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR | ||
148 | RC4 | ||
149 | .PROC | ||
150 | .CALLINFO FRAME=`$FRAME-4*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=6 | ||
151 | .ENTRY | ||
152 | $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue | ||
153 | $PUSHMA %r3,$FRAME(%sp) | ||
154 | $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) | ||
155 | $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) | ||
156 | $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) | ||
157 | |||
158 | cmpib,*= 0,$len,L\$abort | ||
159 | sub $inp,$out,$inp ; distance between $inp and $out | ||
160 | |||
161 | $LD `0*$SZ`($key),$XX[0] | ||
162 | $LD `1*$SZ`($key),$YY | ||
163 | ldo `2*$SZ`($key),$key | ||
164 | |||
165 | ldi 0xff,$mask | ||
166 | ldi 3,$dat0 | ||
167 | |||
168 | ldo 1($XX[0]),$XX[0] ; warm up loop | ||
169 | and $mask,$XX[0],$XX[0] | ||
170 | $LDX $XX[0]($key),$TX[0] | ||
171 | addl $TX[0],$YY,$YY | ||
172 | cmpib,*>>= 6,$len,L\$oop1 ; is $len large enough to bother? | ||
173 | and $mask,$YY,$YY | ||
174 | |||
175 | and,<> $out,$dat0,$rem ; is $out aligned? | ||
176 | b L\$alignedout | ||
177 | subi 4,$rem,$rem | ||
178 | sub $len,$rem,$len | ||
179 | ___ | ||
180 | &foldedloop("L\$alignout",$rem); # process till $out is aligned | ||
181 | |||
182 | $code.=<<___; | ||
183 | L\$alignedout ; $len is at least 4 here | ||
184 | and,<> $inp,$dat0,$acc ; is $inp aligned? | ||
185 | b L\$oop4 | ||
186 | sub $inp,$acc,$rem ; align $inp | ||
187 | |||
188 | sh3addl $acc,%r0,$acc | ||
189 | subi 32,$acc,$acc | ||
190 | mtctl $acc,%cr11 ; load %sar with vshd align factor | ||
191 | ldwx $rem($out),$dat0 | ||
192 | ldo 4($rem),$rem | ||
193 | L\$oop4misalignedinp | ||
194 | ___ | ||
195 | &unrolledloopbody(); | ||
196 | $code.=<<___; | ||
197 | $LDX $TY($key),$ix | ||
198 | ldwx $rem($out),$dat1 | ||
199 | ldo -4($len),$len | ||
200 | or $ix,$acc,$acc ; last piece, no need to dep | ||
201 | vshd $dat0,$dat1,$iy ; align data | ||
202 | copy $dat1,$dat0 | ||
203 | xor $iy,$acc,$acc | ||
204 | stw $acc,0($out) | ||
205 | cmpib,*<< 3,$len,L\$oop4misalignedinp | ||
206 | ldo 4($out),$out | ||
207 | cmpib,*= 0,$len,L\$done | ||
208 | nop | ||
209 | b L\$oop1 | ||
210 | nop | ||
211 | |||
212 | .ALIGN 8 | ||
213 | L\$oop4 | ||
214 | ___ | ||
215 | &unrolledloopbody(); | ||
216 | $code.=<<___; | ||
217 | $LDX $TY($key),$ix | ||
218 | ldwx $inp($out),$dat0 | ||
219 | ldo -4($len),$len | ||
220 | or $ix,$acc,$acc ; last piece, no need to dep | ||
221 | xor $dat0,$acc,$acc | ||
222 | stw $acc,0($out) | ||
223 | cmpib,*<< 3,$len,L\$oop4 | ||
224 | ldo 4($out),$out | ||
225 | cmpib,*= 0,$len,L\$done | ||
226 | nop | ||
227 | ___ | ||
228 | &foldedloop("L\$oop1",$len); | ||
229 | $code.=<<___; | ||
230 | L\$done | ||
231 | $POP `-$FRAME-$SAVED_RP`(%sp),%r2 | ||
232 | ldo -1($XX[0]),$XX[0] ; chill out loop | ||
233 | sub $YY,$TX[0],$YY | ||
234 | and $mask,$XX[0],$XX[0] | ||
235 | and $mask,$YY,$YY | ||
236 | $ST $XX[0],`-2*$SZ`($key) | ||
237 | $ST $YY,`-1*$SZ`($key) | ||
238 | $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 | ||
239 | $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 | ||
240 | $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 | ||
241 | L\$abort | ||
242 | bv (%r2) | ||
243 | .EXIT | ||
244 | $POPMB -$FRAME(%sp),%r3 | ||
245 | .PROCEND | ||
246 | ___ | ||
247 | |||
248 | $code.=<<___; | ||
249 | |||
250 | .EXPORT RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR | ||
251 | .ALIGN 8 | ||
252 | RC4_set_key | ||
253 | .PROC | ||
254 | .CALLINFO NO_CALLS | ||
255 | .ENTRY | ||
256 | $ST %r0,`0*$SZ`($key) | ||
257 | $ST %r0,`1*$SZ`($key) | ||
258 | ldo `2*$SZ`($key),$key | ||
259 | copy %r0,@XX[0] | ||
260 | L\$1st | ||
261 | $ST @XX[0],0($key) | ||
262 | ldo 1(@XX[0]),@XX[0] | ||
263 | bb,>= @XX[0],`31-8`,L\$1st ; @XX[0]<256 | ||
264 | ldo $SZ($key),$key | ||
265 | |||
266 | ldo `-256*$SZ`($key),$key ; rewind $key | ||
267 | addl $len,$inp,$inp ; $inp to point at the end | ||
268 | sub %r0,$len,%r23 ; inverse index | ||
269 | copy %r0,@XX[0] | ||
270 | copy %r0,@XX[1] | ||
271 | ldi 0xff,$mask | ||
272 | |||
273 | L\$2nd | ||
274 | $LDX @XX[0]($key),@TX[0] | ||
275 | ldbx %r23($inp),@TX[1] | ||
276 | addi,nuv 1,%r23,%r23 ; increment and conditional | ||
277 | sub %r0,$len,%r23 ; inverse index | ||
278 | addl @TX[0],@XX[1],@XX[1] | ||
279 | addl @TX[1],@XX[1],@XX[1] | ||
280 | and $mask,@XX[1],@XX[1] | ||
281 | $MKX @XX[0],$key,$TY | ||
282 | $LDX @XX[1]($key),@TX[1] | ||
283 | $MKX @XX[1],$key,$YY | ||
284 | ldo 1(@XX[0]),@XX[0] | ||
285 | $ST @TX[0],0($YY) | ||
286 | bb,>= @XX[0],`31-8`,L\$2nd ; @XX[0]<256 | ||
287 | $ST @TX[1],0($TY) | ||
288 | |||
289 | bv,n (%r2) | ||
290 | .EXIT | ||
291 | nop | ||
292 | .PROCEND | ||
293 | |||
294 | .EXPORT RC4_options,ENTRY | ||
295 | .ALIGN 8 | ||
296 | RC4_options | ||
297 | .PROC | ||
298 | .CALLINFO NO_CALLS | ||
299 | .ENTRY | ||
300 | blr %r0,%r28 | ||
301 | ldi 3,%r1 | ||
302 | L\$pic | ||
303 | andcm %r28,%r1,%r28 | ||
304 | bv (%r2) | ||
305 | .EXIT | ||
306 | ldo L\$opts-L\$pic(%r28),%r28 | ||
307 | .PROCEND | ||
308 | |||
309 | .data | ||
310 | .ALIGN 8 | ||
311 | L\$opts | ||
312 | .STRINGZ "rc4(4x,`$SZ==1?"char":"int"`)" | ||
313 | .STRINGZ "RC4 for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>" | ||
314 | ___ | ||
315 | $code =~ s/\`([^\`]*)\`/eval $1/gem; | ||
316 | $code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4); | ||
317 | $code =~ s/\bbv\b/bve/gm if ($SIZE_T==8); | ||
318 | |||
319 | print $code; | ||
320 | close STDOUT; | ||