summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/rc4/asm/rc4-parisc.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/rc4/asm/rc4-parisc.pl')
-rw-r--r--src/lib/libcrypto/rc4/asm/rc4-parisc.pl320
1 files changed, 0 insertions, 320 deletions
diff --git a/src/lib/libcrypto/rc4/asm/rc4-parisc.pl b/src/lib/libcrypto/rc4/asm/rc4-parisc.pl
deleted file mode 100644
index 7e7974430a..0000000000
--- a/src/lib/libcrypto/rc4/asm/rc4-parisc.pl
+++ /dev/null
@@ -1,320 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# RC4 for PA-RISC.
11
12# June 2009.
13#
14# Performance is 33% better than gcc 3.2 generated code on PA-7100LC.
15# For reference, [4x] unrolled loop is >40% faster than folded one.
16# It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement
17# is believed to be not sufficient to justify the effort...
18#
19# Special thanks to polarhome.com for providing HP-UX account.
20
21$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
22
23$flavour = shift;
24$output = shift;
25open STDOUT,">$output";
26
27if ($flavour =~ /64/) {
28 $LEVEL ="2.0W";
29 $SIZE_T =8;
30 $FRAME_MARKER =80;
31 $SAVED_RP =16;
32 $PUSH ="std";
33 $PUSHMA ="std,ma";
34 $POP ="ldd";
35 $POPMB ="ldd,mb";
36} else {
37 $LEVEL ="1.0";
38 $SIZE_T =4;
39 $FRAME_MARKER =48;
40 $SAVED_RP =20;
41 $PUSH ="stw";
42 $PUSHMA ="stwm";
43 $POP ="ldw";
44 $POPMB ="ldwm";
45}
46
47$FRAME=4*$SIZE_T+$FRAME_MARKER; # 4 saved regs + frame marker
48 # [+ argument transfer]
49$SZ=1; # defaults to RC4_CHAR
50if (open CONF,"<${dir}../../opensslconf.h") {
51 while(<CONF>) {
52 if (m/#\s*define\s+RC4_INT\s+(.*)/) {
53 $SZ = ($1=~/char$/) ? 1 : 4;
54 last;
55 }
56 }
57 close CONF;
58}
59
60if ($SZ==1) { # RC4_CHAR
61 $LD="ldb";
62 $LDX="ldbx";
63 $MKX="addl";
64 $ST="stb";
65} else { # RC4_INT (~5% faster than RC4_CHAR on PA-7100LC)
66 $LD="ldw";
67 $LDX="ldwx,s";
68 $MKX="sh2addl";
69 $ST="stw";
70}
71
72$key="%r26";
73$len="%r25";
74$inp="%r24";
75$out="%r23";
76
77@XX=("%r19","%r20");
78@TX=("%r21","%r22");
79$YY="%r28";
80$TY="%r29";
81
82$acc="%r1";
83$ix="%r2";
84$iy="%r3";
85$dat0="%r4";
86$dat1="%r5";
87$rem="%r6";
88$mask="%r31";
89
90sub unrolledloopbody {
91for ($i=0;$i<4;$i++) {
92$code.=<<___;
93 ldo 1($XX[0]),$XX[1]
94 `sprintf("$LDX %$TY(%$key),%$dat1") if ($i>0)`
95 and $mask,$XX[1],$XX[1]
96 $LDX $YY($key),$TY
97 $MKX $YY,$key,$ix
98 $LDX $XX[1]($key),$TX[1]
99 $MKX $XX[0],$key,$iy
100 $ST $TX[0],0($ix)
101 comclr,<> $XX[1],$YY,%r0 ; conditional
102 copy $TX[0],$TX[1] ; move
103 `sprintf("%sdep %$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)`
104 $ST $TY,0($iy)
105 addl $TX[0],$TY,$TY
106 addl $TX[1],$YY,$YY
107 and $mask,$TY,$TY
108 and $mask,$YY,$YY
109___
110push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
111} }
112
113sub foldedloop {
114my ($label,$count)=@_;
115$code.=<<___;
116$label
117 $MKX $YY,$key,$iy
118 $LDX $YY($key),$TY
119 $MKX $XX[0],$key,$ix
120 $ST $TX[0],0($iy)
121 ldo 1($XX[0]),$XX[0]
122 $ST $TY,0($ix)
123 addl $TX[0],$TY,$TY
124 ldbx $inp($out),$dat1
125 and $mask,$TY,$TY
126 and $mask,$XX[0],$XX[0]
127 $LDX $TY($key),$acc
128 $LDX $XX[0]($key),$TX[0]
129 ldo 1($out),$out
130 xor $dat1,$acc,$acc
131 addl $TX[0],$YY,$YY
132 stb $acc,-1($out)
133 addib,<> -1,$count,$label ; $count is always small
134 and $mask,$YY,$YY
135___
136}
137
138$code=<<___;
139 .LEVEL $LEVEL
140#if 0
141 .SPACE \$TEXT\$
142 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
143#else
144 .text
145#endif
146
147 .EXPORT RC4,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
148RC4
149 .PROC
150 .CALLINFO FRAME=`$FRAME-4*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=6
151 .ENTRY
152 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
153 $PUSHMA %r3,$FRAME(%sp)
154 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
155 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
156 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
157
158 cmpib,*= 0,$len,L\$abort
159 sub $inp,$out,$inp ; distance between $inp and $out
160
161 $LD `0*$SZ`($key),$XX[0]
162 $LD `1*$SZ`($key),$YY
163 ldo `2*$SZ`($key),$key
164
165 ldi 0xff,$mask
166 ldi 3,$dat0
167
168 ldo 1($XX[0]),$XX[0] ; warm up loop
169 and $mask,$XX[0],$XX[0]
170 $LDX $XX[0]($key),$TX[0]
171 addl $TX[0],$YY,$YY
172 cmpib,*>>= 6,$len,L\$oop1 ; is $len large enough to bother?
173 and $mask,$YY,$YY
174
175 and,<> $out,$dat0,$rem ; is $out aligned?
176 b L\$alignedout
177 subi 4,$rem,$rem
178 sub $len,$rem,$len
179___
180&foldedloop("L\$alignout",$rem); # process till $out is aligned
181
182$code.=<<___;
183L\$alignedout ; $len is at least 4 here
184 and,<> $inp,$dat0,$acc ; is $inp aligned?
185 b L\$oop4
186 sub $inp,$acc,$rem ; align $inp
187
188 sh3addl $acc,%r0,$acc
189 subi 32,$acc,$acc
190 mtctl $acc,%cr11 ; load %sar with vshd align factor
191 ldwx $rem($out),$dat0
192 ldo 4($rem),$rem
193L\$oop4misalignedinp
194___
195&unrolledloopbody();
196$code.=<<___;
197 $LDX $TY($key),$ix
198 ldwx $rem($out),$dat1
199 ldo -4($len),$len
200 or $ix,$acc,$acc ; last piece, no need to dep
201 vshd $dat0,$dat1,$iy ; align data
202 copy $dat1,$dat0
203 xor $iy,$acc,$acc
204 stw $acc,0($out)
205 cmpib,*<< 3,$len,L\$oop4misalignedinp
206 ldo 4($out),$out
207 cmpib,*= 0,$len,L\$done
208 nop
209 b L\$oop1
210 nop
211
212 .ALIGN 8
213L\$oop4
214___
215&unrolledloopbody();
216$code.=<<___;
217 $LDX $TY($key),$ix
218 ldwx $inp($out),$dat0
219 ldo -4($len),$len
220 or $ix,$acc,$acc ; last piece, no need to dep
221 xor $dat0,$acc,$acc
222 stw $acc,0($out)
223 cmpib,*<< 3,$len,L\$oop4
224 ldo 4($out),$out
225 cmpib,*= 0,$len,L\$done
226 nop
227___
228&foldedloop("L\$oop1",$len);
229$code.=<<___;
230L\$done
231 $POP `-$FRAME-$SAVED_RP`(%sp),%r2
232 ldo -1($XX[0]),$XX[0] ; chill out loop
233 sub $YY,$TX[0],$YY
234 and $mask,$XX[0],$XX[0]
235 and $mask,$YY,$YY
236 $ST $XX[0],`-2*$SZ`($key)
237 $ST $YY,`-1*$SZ`($key)
238 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
239 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
240 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
241L\$abort
242 bv (%r2)
243 .EXIT
244 $POPMB -$FRAME(%sp),%r3
245 .PROCEND
246___
247
248$code.=<<___;
249
250 .EXPORT RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
251 .ALIGN 8
252RC4_set_key
253 .PROC
254 .CALLINFO NO_CALLS
255 .ENTRY
256 $ST %r0,`0*$SZ`($key)
257 $ST %r0,`1*$SZ`($key)
258 ldo `2*$SZ`($key),$key
259 copy %r0,@XX[0]
260L\$1st
261 $ST @XX[0],0($key)
262 ldo 1(@XX[0]),@XX[0]
263 bb,>= @XX[0],`31-8`,L\$1st ; @XX[0]<256
264 ldo $SZ($key),$key
265
266 ldo `-256*$SZ`($key),$key ; rewind $key
267 addl $len,$inp,$inp ; $inp to point at the end
268 sub %r0,$len,%r23 ; inverse index
269 copy %r0,@XX[0]
270 copy %r0,@XX[1]
271 ldi 0xff,$mask
272
273L\$2nd
274 $LDX @XX[0]($key),@TX[0]
275 ldbx %r23($inp),@TX[1]
276 addi,nuv 1,%r23,%r23 ; increment and conditional
277 sub %r0,$len,%r23 ; inverse index
278 addl @TX[0],@XX[1],@XX[1]
279 addl @TX[1],@XX[1],@XX[1]
280 and $mask,@XX[1],@XX[1]
281 $MKX @XX[0],$key,$TY
282 $LDX @XX[1]($key),@TX[1]
283 $MKX @XX[1],$key,$YY
284 ldo 1(@XX[0]),@XX[0]
285 $ST @TX[0],0($YY)
286 bb,>= @XX[0],`31-8`,L\$2nd ; @XX[0]<256
287 $ST @TX[1],0($TY)
288
289 bv,n (%r2)
290 .EXIT
291 nop
292 .PROCEND
293
294 .EXPORT RC4_options,ENTRY
295 .ALIGN 8
296RC4_options
297 .PROC
298 .CALLINFO NO_CALLS
299 .ENTRY
300 blr %r0,%r28
301 ldi 3,%r1
302L\$pic
303 andcm %r28,%r1,%r28
304 bv (%r2)
305 .EXIT
306 ldo L\$opts-L\$pic(%r28),%r28
307 .PROCEND
308
309 .data
310 .ALIGN 8
311L\$opts
312 .STRINGZ "rc4(4x,`$SZ==1?"char":"int"`)"
313 .STRINGZ "RC4 for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
314___
315$code =~ s/\`([^\`]*)\`/eval $1/gem;
316$code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4);
317$code =~ s/\bbv\b/bve/gm if ($SIZE_T==8);
318
319print $code;
320close STDOUT;