summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/sha/asm/sha512-parisc.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/sha/asm/sha512-parisc.pl')
-rwxr-xr-xsrc/lib/libcrypto/sha/asm/sha512-parisc.pl801
1 files changed, 0 insertions, 801 deletions
diff --git a/src/lib/libcrypto/sha/asm/sha512-parisc.pl b/src/lib/libcrypto/sha/asm/sha512-parisc.pl
deleted file mode 100755
index 42832e29f1..0000000000
--- a/src/lib/libcrypto/sha/asm/sha512-parisc.pl
+++ /dev/null
@@ -1,801 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# SHA256/512 block procedure for PA-RISC.
11
12# June 2009.
13#
14# SHA256 performance is >75% better than gcc 3.2 generated code on
15# PA-7100LC. Compared to code generated by vendor compiler this
16# implementation is almost 70% faster in 64-bit build, but delivers
17# virtually same performance in 32-bit build on PA-8600.
18#
19# SHA512 performance is >2.9x better than gcc 3.2 generated code on
20# PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the
21# code is executed on PA-RISC 2.0 processor and switches to 64-bit
22# code path delivering adequate performance even in "blended" 32-bit
23# build. Though 64-bit code is not any faster than code generated by
24# vendor compiler on PA-8600...
25#
26# Special thanks to polarhome.com for providing HP-UX account.
27
28$flavour = shift;
29$output = shift;
30open STDOUT,">$output";
31
32if ($flavour =~ /64/) {
33 $LEVEL ="2.0W";
34 $SIZE_T =8;
35 $FRAME_MARKER =80;
36 $SAVED_RP =16;
37 $PUSH ="std";
38 $PUSHMA ="std,ma";
39 $POP ="ldd";
40 $POPMB ="ldd,mb";
41} else {
42 $LEVEL ="1.0";
43 $SIZE_T =4;
44 $FRAME_MARKER =48;
45 $SAVED_RP =20;
46 $PUSH ="stw";
47 $PUSHMA ="stwm";
48 $POP ="ldw";
49 $POPMB ="ldwm";
50}
51
52if ($output =~ /512/) {
53 $func="sha512_block_data_order";
54 $SZ=8;
55 @Sigma0=(28,34,39);
56 @Sigma1=(14,18,41);
57 @sigma0=(1, 8, 7);
58 @sigma1=(19,61, 6);
59 $rounds=80;
60 $LAST10BITS=0x017;
61 $LD="ldd";
62 $LDM="ldd,ma";
63 $ST="std";
64} else {
65 $func="sha256_block_data_order";
66 $SZ=4;
67 @Sigma0=( 2,13,22);
68 @Sigma1=( 6,11,25);
69 @sigma0=( 7,18, 3);
70 @sigma1=(17,19,10);
71 $rounds=64;
72 $LAST10BITS=0x0f2;
73 $LD="ldw";
74 $LDM="ldwm";
75 $ST="stw";
76}
77
78$FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker
79 # [+ argument transfer]
80$XOFF=16*$SZ+32; # local variables
81$FRAME+=$XOFF;
82$XOFF+=$FRAME_MARKER; # distance between %sp and local variables
83
84$ctx="%r26"; # zapped by $a0
85$inp="%r25"; # zapped by $a1
86$num="%r24"; # zapped by $t0
87
88$a0 ="%r26";
89$a1 ="%r25";
90$t0 ="%r24";
91$t1 ="%r29";
92$Tbl="%r31";
93
94@V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28");
95
96@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
97 "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp);
98
99sub ROUND_00_15 {
100my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
101$code.=<<___;
102 _ror $e,$Sigma1[0],$a0
103 and $f,$e,$t0
104 _ror $e,$Sigma1[1],$a1
105 addl $t1,$h,$h
106 andcm $g,$e,$t1
107 xor $a1,$a0,$a0
108 _ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1
109 or $t0,$t1,$t1 ; Ch(e,f,g)
110 addl @X[$i%16],$h,$h
111 xor $a0,$a1,$a1 ; Sigma1(e)
112 addl $t1,$h,$h
113 _ror $a,$Sigma0[0],$a0
114 addl $a1,$h,$h
115
116 _ror $a,$Sigma0[1],$a1
117 and $a,$b,$t0
118 and $a,$c,$t1
119 xor $a1,$a0,$a0
120 _ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1
121 xor $t1,$t0,$t0
122 and $b,$c,$t1
123 xor $a0,$a1,$a1 ; Sigma0(a)
124 addl $h,$d,$d
125 xor $t1,$t0,$t0 ; Maj(a,b,c)
126 `"$LDM $SZ($Tbl),$t1" if ($i<15)`
127 addl $a1,$h,$h
128 addl $t0,$h,$h
129
130___
131}
132
133sub ROUND_16_xx {
134my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
135$i-=16;
136$code.=<<___;
137 _ror @X[($i+1)%16],$sigma0[0],$a0
138 _ror @X[($i+1)%16],$sigma0[1],$a1
139 addl @X[($i+9)%16],@X[$i],@X[$i]
140 _ror @X[($i+14)%16],$sigma1[0],$t0
141 _ror @X[($i+14)%16],$sigma1[1],$t1
142 xor $a1,$a0,$a0
143 _shr @X[($i+1)%16],$sigma0[2],$a1
144 xor $t1,$t0,$t0
145 _shr @X[($i+14)%16],$sigma1[2],$t1
146 xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f])
147 xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f])
148 $LDM $SZ($Tbl),$t1
149 addl $a0,@X[$i],@X[$i]
150 addl $t0,@X[$i],@X[$i]
151___
152$code.=<<___ if ($i==15);
153 extru $t1,31,10,$a1
154 comiclr,<> $LAST10BITS,$a1,%r0
155 ldo 1($Tbl),$Tbl ; signal end of $Tbl
156___
157&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
158}
159
160$code=<<___;
161 .LEVEL $LEVEL
162 .text
163
164 .section .rodata
165 .ALIGN 64
166L\$table
167___
168$code.=<<___ if ($SZ==8);
169 .WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
170 .WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
171 .WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
172 .WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
173 .WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
174 .WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
175 .WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
176 .WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
177 .WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
178 .WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
179 .WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
180 .WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
181 .WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
182 .WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
183 .WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
184 .WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
185 .WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
186 .WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
187 .WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
188 .WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
189 .WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
190 .WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
191 .WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
192 .WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
193 .WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
194 .WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
195 .WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
196 .WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
197 .WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
198 .WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
199 .WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
200 .WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
201 .WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
202 .WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
203 .WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
204 .WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
205 .WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
206 .WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
207 .WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
208 .WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
209___
210$code.=<<___ if ($SZ==4);
211 .WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
212 .WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
213 .WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
214 .WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
215 .WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
216 .WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
217 .WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
218 .WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
219 .WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
220 .WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
221 .WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
222 .WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
223 .WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
224 .WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
225 .WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
226 .WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
227___
228$code.=<<___;
229 .previous
230
231 .EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
232 .ALIGN 64
233$func
234 .PROC
235 .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18
236 .ENTRY
237 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
238 $PUSHMA %r3,$FRAME(%sp)
239 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
240 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
241 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
242 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
243 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
244 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
245 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
246 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
247 $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
248 $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
249 $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
250 $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
251 $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
252 $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp)
253 $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp)
254
255 _shl $num,`log(16*$SZ)/log(2)`,$num
256 addl $inp,$num,$num ; $num to point at the end of $inp
257
258 $PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments
259 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)
260 $PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp)
261
262#ifdef __PIC__
263 addil LT'L\$table, %r19
264 ldw RT'L\$table(%r1), $Tbl
265#else
266 ldil L'L\$table, %t1
267 ldo R'L\$table(%t1), $Tbl
268#endif
269___
270$code.=<<___ if ($SZ==8 && $SIZE_T==4);
271#ifndef __OpenBSD__
272___
273$code.=<<___ if ($SZ==8 && $SIZE_T==4);
274 ldi 31,$t1
275 mtctl $t1,%cr11
276 extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0
277 b L\$parisc1
278 nop
279___
280$code.=<<___;
281 $LD `0*$SZ`($ctx),$A ; load context
282 $LD `1*$SZ`($ctx),$B
283 $LD `2*$SZ`($ctx),$C
284 $LD `3*$SZ`($ctx),$D
285 $LD `4*$SZ`($ctx),$E
286 $LD `5*$SZ`($ctx),$F
287 $LD `6*$SZ`($ctx),$G
288 $LD `7*$SZ`($ctx),$H
289
290 extru $inp,31,`log($SZ)/log(2)`,$t0
291 sh3addl $t0,%r0,$t0
292 subi `8*$SZ`,$t0,$t0
293 mtctl $t0,%cr11 ; load %sar with align factor
294
295L\$oop
296 ldi `$SZ-1`,$t0
297 $LDM $SZ($Tbl),$t1
298 andcm $inp,$t0,$t0 ; align $inp
299___
300 for ($i=0;$i<15;$i++) { # load input block
301 $code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; }
302$code.=<<___;
303 cmpb,*= $inp,$t0,L\$aligned
304 $LD `$SZ*15`($t0),@X[15]
305 $LD `$SZ*16`($t0),@X[16]
306___
307 for ($i=0;$i<16;$i++) { # align data
308 $code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; }
309$code.=<<___;
310L\$aligned
311 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
312___
313
314for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); }
315$code.=<<___;
316L\$rounds
317 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
318___
319for(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); }
320$code.=<<___;
321 bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled?
322 nop
323
324 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
325 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
326 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
327 ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl
328
329 $LD `0*$SZ`($ctx),@X[0] ; load context
330 $LD `1*$SZ`($ctx),@X[1]
331 $LD `2*$SZ`($ctx),@X[2]
332 $LD `3*$SZ`($ctx),@X[3]
333 $LD `4*$SZ`($ctx),@X[4]
334 $LD `5*$SZ`($ctx),@X[5]
335 addl @X[0],$A,$A
336 $LD `6*$SZ`($ctx),@X[6]
337 addl @X[1],$B,$B
338 $LD `7*$SZ`($ctx),@X[7]
339 ldo `16*$SZ`($inp),$inp ; advance $inp
340
341 $ST $A,`0*$SZ`($ctx) ; save context
342 addl @X[2],$C,$C
343 $ST $B,`1*$SZ`($ctx)
344 addl @X[3],$D,$D
345 $ST $C,`2*$SZ`($ctx)
346 addl @X[4],$E,$E
347 $ST $D,`3*$SZ`($ctx)
348 addl @X[5],$F,$F
349 $ST $E,`4*$SZ`($ctx)
350 addl @X[6],$G,$G
351 $ST $F,`5*$SZ`($ctx)
352 addl @X[7],$H,$H
353 $ST $G,`6*$SZ`($ctx)
354 $ST $H,`7*$SZ`($ctx)
355
356 cmpb,*<>,n $inp,$num,L\$oop
357 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
358___
359if ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0
360{{
361$code.=<<___;
362 b L\$done
363 nop
364
365 .ALIGN 64
366L\$parisc1
367___
368$code.=<<___ if ($SZ==8 && $SIZE_T==4);
369#endif
370___
371
372@V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo,
373 $Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) =
374 ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
375 "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16");
376$a0 ="%r17";
377$a1 ="%r18";
378$a2 ="%r19";
379$a3 ="%r20";
380$t0 ="%r21";
381$t1 ="%r22";
382$t2 ="%r28";
383$t3 ="%r29";
384$Tbl="%r31";
385
386@X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx
387
388sub ROUND_00_15_pa1 {
389my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
390 $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_;
391my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
392
393$code.=<<___ if (!$flag);
394 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
395 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
396___
397$code.=<<___;
398 shd $ehi,$elo,$Sigma1[0],$t0
399 add $Xlo,$hlo,$hlo
400 shd $elo,$ehi,$Sigma1[0],$t1
401 addc $Xhi,$hhi,$hhi ; h += X[i]
402 shd $ehi,$elo,$Sigma1[1],$t2
403 ldwm 8($Tbl),$Xhi
404 shd $elo,$ehi,$Sigma1[1],$t3
405 ldw -4($Tbl),$Xlo ; load K[i]
406 xor $t2,$t0,$t0
407 xor $t3,$t1,$t1
408 and $flo,$elo,$a0
409 and $fhi,$ehi,$a1
410 shd $ehi,$elo,$Sigma1[2],$t2
411 andcm $glo,$elo,$a2
412 shd $elo,$ehi,$Sigma1[2],$t3
413 andcm $ghi,$ehi,$a3
414 xor $t2,$t0,$t0
415 xor $t3,$t1,$t1 ; Sigma1(e)
416 add $Xlo,$hlo,$hlo
417 xor $a2,$a0,$a0
418 addc $Xhi,$hhi,$hhi ; h += K[i]
419 xor $a3,$a1,$a1 ; Ch(e,f,g)
420
421 add $t0,$hlo,$hlo
422 shd $ahi,$alo,$Sigma0[0],$t0
423 addc $t1,$hhi,$hhi ; h += Sigma1(e)
424 shd $alo,$ahi,$Sigma0[0],$t1
425 add $a0,$hlo,$hlo
426 shd $ahi,$alo,$Sigma0[1],$t2
427 addc $a1,$hhi,$hhi ; h += Ch(e,f,g)
428 shd $alo,$ahi,$Sigma0[1],$t3
429
430 xor $t2,$t0,$t0
431 xor $t3,$t1,$t1
432 shd $ahi,$alo,$Sigma0[2],$t2
433 and $alo,$blo,$a0
434 shd $alo,$ahi,$Sigma0[2],$t3
435 and $ahi,$bhi,$a1
436 xor $t2,$t0,$t0
437 xor $t3,$t1,$t1 ; Sigma0(a)
438
439 and $alo,$clo,$a2
440 and $ahi,$chi,$a3
441 xor $a2,$a0,$a0
442 add $hlo,$dlo,$dlo
443 xor $a3,$a1,$a1
444 addc $hhi,$dhi,$dhi ; d += h
445 and $blo,$clo,$a2
446 add $t0,$hlo,$hlo
447 and $bhi,$chi,$a3
448 addc $t1,$hhi,$hhi ; h += Sigma0(a)
449 xor $a2,$a0,$a0
450 add $a0,$hlo,$hlo
451 xor $a3,$a1,$a1 ; Maj(a,b,c)
452 addc $a1,$hhi,$hhi ; h += Maj(a,b,c)
453
454___
455$code.=<<___ if ($i==15 && $flag);
456 extru $Xlo,31,10,$Xlo
457 comiclr,= $LAST10BITS,$Xlo,%r0
458 b L\$rounds_pa1
459 nop
460___
461push(@X,shift(@X)); push(@X,shift(@X));
462}
463
464sub ROUND_16_xx_pa1 {
465my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
466my ($i)=shift;
467$i-=16;
468$code.=<<___;
469 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
470 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
471 ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1
472 ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9]
473 ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3
474 ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14]
475 shd $Xnhi,$Xnlo,$sigma0[0],$t0
476 shd $Xnlo,$Xnhi,$sigma0[0],$t1
477 add $a0,$Xlo,$Xlo
478 shd $Xnhi,$Xnlo,$sigma0[1],$t2
479 addc $a1,$Xhi,$Xhi
480 shd $Xnlo,$Xnhi,$sigma0[1],$t3
481 xor $t2,$t0,$t0
482 shd $Xnhi,$Xnlo,$sigma0[2],$t2
483 xor $t3,$t1,$t1
484 extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3
485 xor $t2,$t0,$t0
486 shd $a3,$a2,$sigma1[0],$a0
487 xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f])
488 shd $a2,$a3,$sigma1[0],$a1
489 add $t0,$Xlo,$Xlo
490 shd $a3,$a2,$sigma1[1],$t2
491 addc $t1,$Xhi,$Xhi
492 shd $a2,$a3,$sigma1[1],$t3
493 xor $t2,$a0,$a0
494 shd $a3,$a2,$sigma1[2],$t2
495 xor $t3,$a1,$a1
496 extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3
497 xor $t2,$a0,$a0
498 xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f])
499 add $a0,$Xlo,$Xlo
500 addc $a1,$Xhi,$Xhi
501
502 stw $Xhi,`-$XOFF+8*($i%16)`(%sp)
503 stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp)
504___
505&ROUND_00_15_pa1($i,@_,1);
506}
507$code.=<<___;
508 ldw `0*4`($ctx),$Ahi ; load context
509 ldw `1*4`($ctx),$Alo
510 ldw `2*4`($ctx),$Bhi
511 ldw `3*4`($ctx),$Blo
512 ldw `4*4`($ctx),$Chi
513 ldw `5*4`($ctx),$Clo
514 ldw `6*4`($ctx),$Dhi
515 ldw `7*4`($ctx),$Dlo
516 ldw `8*4`($ctx),$Ehi
517 ldw `9*4`($ctx),$Elo
518 ldw `10*4`($ctx),$Fhi
519 ldw `11*4`($ctx),$Flo
520 ldw `12*4`($ctx),$Ghi
521 ldw `13*4`($ctx),$Glo
522 ldw `14*4`($ctx),$Hhi
523 ldw `15*4`($ctx),$Hlo
524
525 extru $inp,31,2,$t0
526 sh3addl $t0,%r0,$t0
527 subi 32,$t0,$t0
528 mtctl $t0,%cr11 ; load %sar with align factor
529
530L\$oop_pa1
531 extru $inp,31,2,$a3
532 comib,= 0,$a3,L\$aligned_pa1
533 sub $inp,$a3,$inp
534
535 ldw `0*4`($inp),$X[0]
536 ldw `1*4`($inp),$X[1]
537 ldw `2*4`($inp),$t2
538 ldw `3*4`($inp),$t3
539 ldw `4*4`($inp),$a0
540 ldw `5*4`($inp),$a1
541 ldw `6*4`($inp),$a2
542 ldw `7*4`($inp),$a3
543 vshd $X[0],$X[1],$X[0]
544 vshd $X[1],$t2,$X[1]
545 stw $X[0],`-$XOFF+0*4`(%sp)
546 ldw `8*4`($inp),$t0
547 vshd $t2,$t3,$t2
548 stw $X[1],`-$XOFF+1*4`(%sp)
549 ldw `9*4`($inp),$t1
550 vshd $t3,$a0,$t3
551___
552{
553my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
554for ($i=2;$i<=(128/4-8);$i++) {
555$code.=<<___;
556 stw $t[0],`-$XOFF+$i*4`(%sp)
557 ldw `(8+$i)*4`($inp),$t[0]
558 vshd $t[1],$t[2],$t[1]
559___
560push(@t,shift(@t));
561}
562for (;$i<(128/4-1);$i++) {
563$code.=<<___;
564 stw $t[0],`-$XOFF+$i*4`(%sp)
565 vshd $t[1],$t[2],$t[1]
566___
567push(@t,shift(@t));
568}
569$code.=<<___;
570 b L\$collected_pa1
571 stw $t[0],`-$XOFF+$i*4`(%sp)
572
573___
574}
575$code.=<<___;
576L\$aligned_pa1
577 ldw `0*4`($inp),$X[0]
578 ldw `1*4`($inp),$X[1]
579 ldw `2*4`($inp),$t2
580 ldw `3*4`($inp),$t3
581 ldw `4*4`($inp),$a0
582 ldw `5*4`($inp),$a1
583 ldw `6*4`($inp),$a2
584 ldw `7*4`($inp),$a3
585 stw $X[0],`-$XOFF+0*4`(%sp)
586 ldw `8*4`($inp),$t0
587 stw $X[1],`-$XOFF+1*4`(%sp)
588 ldw `9*4`($inp),$t1
589___
590{
591my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
592for ($i=2;$i<(128/4-8);$i++) {
593$code.=<<___;
594 stw $t[0],`-$XOFF+$i*4`(%sp)
595 ldw `(8+$i)*4`($inp),$t[0]
596___
597push(@t,shift(@t));
598}
599for (;$i<128/4;$i++) {
600$code.=<<___;
601 stw $t[0],`-$XOFF+$i*4`(%sp)
602___
603push(@t,shift(@t));
604}
605$code.="L\$collected_pa1\n";
606}
607
608for($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
609$code.="L\$rounds_pa1\n";
610for(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
611
612$code.=<<___;
613 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
614 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
615 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
616 ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl
617
618 ldw `0*4`($ctx),$t1 ; update context
619 ldw `1*4`($ctx),$t0
620 ldw `2*4`($ctx),$t3
621 ldw `3*4`($ctx),$t2
622 ldw `4*4`($ctx),$a1
623 ldw `5*4`($ctx),$a0
624 ldw `6*4`($ctx),$a3
625 add $t0,$Alo,$Alo
626 ldw `7*4`($ctx),$a2
627 addc $t1,$Ahi,$Ahi
628 ldw `8*4`($ctx),$t1
629 add $t2,$Blo,$Blo
630 ldw `9*4`($ctx),$t0
631 addc $t3,$Bhi,$Bhi
632 ldw `10*4`($ctx),$t3
633 add $a0,$Clo,$Clo
634 ldw `11*4`($ctx),$t2
635 addc $a1,$Chi,$Chi
636 ldw `12*4`($ctx),$a1
637 add $a2,$Dlo,$Dlo
638 ldw `13*4`($ctx),$a0
639 addc $a3,$Dhi,$Dhi
640 ldw `14*4`($ctx),$a3
641 add $t0,$Elo,$Elo
642 ldw `15*4`($ctx),$a2
643 addc $t1,$Ehi,$Ehi
644 stw $Ahi,`0*4`($ctx)
645 add $t2,$Flo,$Flo
646 stw $Alo,`1*4`($ctx)
647 addc $t3,$Fhi,$Fhi
648 stw $Bhi,`2*4`($ctx)
649 add $a0,$Glo,$Glo
650 stw $Blo,`3*4`($ctx)
651 addc $a1,$Ghi,$Ghi
652 stw $Chi,`4*4`($ctx)
653 add $a2,$Hlo,$Hlo
654 stw $Clo,`5*4`($ctx)
655 addc $a3,$Hhi,$Hhi
656 stw $Dhi,`6*4`($ctx)
657 ldo `16*$SZ`($inp),$inp ; advance $inp
658 stw $Dlo,`7*4`($ctx)
659 stw $Ehi,`8*4`($ctx)
660 stw $Elo,`9*4`($ctx)
661 stw $Fhi,`10*4`($ctx)
662 stw $Flo,`11*4`($ctx)
663 stw $Ghi,`12*4`($ctx)
664 stw $Glo,`13*4`($ctx)
665 stw $Hhi,`14*4`($ctx)
666 comb,= $inp,$num,L\$done
667 stw $Hlo,`15*4`($ctx)
668 b L\$oop_pa1
669 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
670L\$done
671___
672}}
673$code.=<<___;
674 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
675 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
676 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
677 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
678 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
679 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
680 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
681 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
682 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
683 $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
684 $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
685 $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
686 $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
687 $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
688 $POP `-$FRAME+14*$SIZE_T`(%sp),%r17
689 $POP `-$FRAME+15*$SIZE_T`(%sp),%r18
690 bv (%r2)
691 .EXIT
692 $POPMB -$FRAME(%sp),%r3
693 .PROCEND
694___
695
696# Explicitly encode PA-RISC 2.0 instructions used in this module, so
697# that it can be compiled with .LEVEL 1.0. It should be noted that I
698# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
699# directive...
700
701my $ldd = sub {
702 my ($mod,$args) = @_;
703 my $orig = "ldd$mod\t$args";
704
705 if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices
706 { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1);
707 $opcode|=(1<<3) if ($mod =~ /^,m/);
708 $opcode|=(1<<2) if ($mod =~ /^,mb/);
709 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
710 }
711 else { "\t".$orig; }
712};
713
714my $std = sub {
715 my ($mod,$args) = @_;
716 my $orig = "std$mod\t$args";
717
718 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
719 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
720 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
721 }
722 else { "\t".$orig; }
723};
724
725my $extrd = sub {
726 my ($mod,$args) = @_;
727 my $orig = "extrd$mod\t$args";
728
729 # I only have ",u" completer, it's implicitly encoded...
730 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
731 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
732 my $len=32-$3;
733 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
734 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
735 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
736 }
737 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
738 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
739 my $len=32-$2;
740 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
741 $opcode |= (1<<13) if ($mod =~ /,\**=/);
742 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
743 }
744 else { "\t".$orig; }
745};
746
747my $shrpd = sub {
748 my ($mod,$args) = @_;
749 my $orig = "shrpd$mod\t$args";
750
751 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
752 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
753 my $cpos=63-$3;
754 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
755 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
756 }
757 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
758 { sprintf "\t.WORD\t0x%08x\t; %s",
759 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
760 }
761 else { "\t".$orig; }
762};
763
764sub assemble {
765 my ($mnemonic,$mod,$args)=@_;
766 my $opcode = eval("\$$mnemonic");
767
768 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
769}
770
771foreach (split("\n",$code)) {
772 s/\`([^\`]*)\`/eval $1/ge;
773
774 s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/
775 $3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for >=32
776 : sprintf("shd\t%$1,%$2,%d",$3)/e or
777 # translate made up instructons: _ror, _shr, _align, _shl
778 s/_ror(\s+)(%r[0-9]+),/
779 ($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or
780
781 s/_shr(\s+%r[0-9]+),([0-9]+),/
782 $SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2)
783 : sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or
784
785 s/_align(\s+%r[0-9]+,%r[0-9]+),/
786 ($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or
787
788 s/_shl(\s+%r[0-9]+),([0-9]+),/
789 $SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2)
790 : sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e;
791
792 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4);
793
794 s/cmpb,\*/comb,/ if ($SIZE_T==4);
795
796 s/\bbv\b/bve/ if ($SIZE_T==8);
797
798 print $_,"\n";
799}
800
801close STDOUT;