summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/sha/asm/sha512-parisc.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/sha/asm/sha512-parisc.pl')
-rwxr-xr-xsrc/lib/libcrypto/sha/asm/sha512-parisc.pl805
1 files changed, 0 insertions, 805 deletions
diff --git a/src/lib/libcrypto/sha/asm/sha512-parisc.pl b/src/lib/libcrypto/sha/asm/sha512-parisc.pl
deleted file mode 100755
index 4af7731661..0000000000
--- a/src/lib/libcrypto/sha/asm/sha512-parisc.pl
+++ /dev/null
@@ -1,805 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# SHA256/512 block procedure for PA-RISC.
11
12# June 2009.
13#
14# SHA256 performance is >75% better than gcc 3.2 generated code on
15# PA-7100LC. Compared to code generated by vendor compiler this
16# implementation is almost 70% faster in 64-bit build, but delivers
17# virtually same performance in 32-bit build on PA-8600.
18#
19# SHA512 performance is >2.9x better than gcc 3.2 generated code on
20# PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the
21# code is executed on PA-RISC 2.0 processor and switches to 64-bit
22# code path delivering adequate peformance even in "blended" 32-bit
23# build. Though 64-bit code is not any faster than code generated by
24# vendor compiler on PA-8600...
25#
26# Special thanks to polarhome.com for providing HP-UX account.
27
28$flavour = shift;
29$output = shift;
30open STDOUT,">$output";
31
32if ($flavour =~ /64/) {
33 $LEVEL ="2.0W";
34 $SIZE_T =8;
35 $FRAME_MARKER =80;
36 $SAVED_RP =16;
37 $PUSH ="std";
38 $PUSHMA ="std,ma";
39 $POP ="ldd";
40 $POPMB ="ldd,mb";
41} else {
42 $LEVEL ="1.0";
43 $SIZE_T =4;
44 $FRAME_MARKER =48;
45 $SAVED_RP =20;
46 $PUSH ="stw";
47 $PUSHMA ="stwm";
48 $POP ="ldw";
49 $POPMB ="ldwm";
50}
51
52if ($output =~ /512/) {
53 $func="sha512_block_data_order";
54 $SZ=8;
55 @Sigma0=(28,34,39);
56 @Sigma1=(14,18,41);
57 @sigma0=(1, 8, 7);
58 @sigma1=(19,61, 6);
59 $rounds=80;
60 $LAST10BITS=0x017;
61 $LD="ldd";
62 $LDM="ldd,ma";
63 $ST="std";
64} else {
65 $func="sha256_block_data_order";
66 $SZ=4;
67 @Sigma0=( 2,13,22);
68 @Sigma1=( 6,11,25);
69 @sigma0=( 7,18, 3);
70 @sigma1=(17,19,10);
71 $rounds=64;
72 $LAST10BITS=0x0f2;
73 $LD="ldw";
74 $LDM="ldwm";
75 $ST="stw";
76}
77
78$FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker
79 # [+ argument transfer]
80$XOFF=16*$SZ+32; # local variables
81$FRAME+=$XOFF;
82$XOFF+=$FRAME_MARKER; # distance between %sp and local variables
83
84$ctx="%r26"; # zapped by $a0
85$inp="%r25"; # zapped by $a1
86$num="%r24"; # zapped by $t0
87
88$a0 ="%r26";
89$a1 ="%r25";
90$t0 ="%r24";
91$t1 ="%r29";
92$Tbl="%r31";
93
94@V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28");
95
96@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
97 "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp);
98
99sub ROUND_00_15 {
100my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
101$code.=<<___;
102 _ror $e,$Sigma1[0],$a0
103 and $f,$e,$t0
104 _ror $e,$Sigma1[1],$a1
105 addl $t1,$h,$h
106 andcm $g,$e,$t1
107 xor $a1,$a0,$a0
108 _ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1
109 or $t0,$t1,$t1 ; Ch(e,f,g)
110 addl @X[$i%16],$h,$h
111 xor $a0,$a1,$a1 ; Sigma1(e)
112 addl $t1,$h,$h
113 _ror $a,$Sigma0[0],$a0
114 addl $a1,$h,$h
115
116 _ror $a,$Sigma0[1],$a1
117 and $a,$b,$t0
118 and $a,$c,$t1
119 xor $a1,$a0,$a0
120 _ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1
121 xor $t1,$t0,$t0
122 and $b,$c,$t1
123 xor $a0,$a1,$a1 ; Sigma0(a)
124 addl $h,$d,$d
125 xor $t1,$t0,$t0 ; Maj(a,b,c)
126 `"$LDM $SZ($Tbl),$t1" if ($i<15)`
127 addl $a1,$h,$h
128 addl $t0,$h,$h
129
130___
131}
132
133sub ROUND_16_xx {
134my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
135$i-=16;
136$code.=<<___;
137 _ror @X[($i+1)%16],$sigma0[0],$a0
138 _ror @X[($i+1)%16],$sigma0[1],$a1
139 addl @X[($i+9)%16],@X[$i],@X[$i]
140 _ror @X[($i+14)%16],$sigma1[0],$t0
141 _ror @X[($i+14)%16],$sigma1[1],$t1
142 xor $a1,$a0,$a0
143 _shr @X[($i+1)%16],$sigma0[2],$a1
144 xor $t1,$t0,$t0
145 _shr @X[($i+14)%16],$sigma1[2],$t1
146 xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f])
147 xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f])
148 $LDM $SZ($Tbl),$t1
149 addl $a0,@X[$i],@X[$i]
150 addl $t0,@X[$i],@X[$i]
151___
152$code.=<<___ if ($i==15);
153 extru $t1,31,10,$a1
154 comiclr,<> $LAST10BITS,$a1,%r0
155 ldo 1($Tbl),$Tbl ; signal end of $Tbl
156___
157&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
158}
159
160$code=<<___;
161 .LEVEL $LEVEL
162#if 0
163 .SPACE \$TEXT\$
164 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
165#else
166 .text
167#endif
168
169 .ALIGN 64
170L\$table
171___
172$code.=<<___ if ($SZ==8);
173 .WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
174 .WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
175 .WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
176 .WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
177 .WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
178 .WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
179 .WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
180 .WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
181 .WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
182 .WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
183 .WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
184 .WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
185 .WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
186 .WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
187 .WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
188 .WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
189 .WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
190 .WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
191 .WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
192 .WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
193 .WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
194 .WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
195 .WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
196 .WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
197 .WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
198 .WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
199 .WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
200 .WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
201 .WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
202 .WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
203 .WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
204 .WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
205 .WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
206 .WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
207 .WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
208 .WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
209 .WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
210 .WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
211 .WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
212 .WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
213___
214$code.=<<___ if ($SZ==4);
215 .WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
216 .WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
217 .WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
218 .WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
219 .WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
220 .WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
221 .WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
222 .WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
223 .WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
224 .WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
225 .WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
226 .WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
227 .WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
228 .WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
229 .WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
230 .WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
231___
232$code.=<<___;
233
234 .EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
235 .ALIGN 64
236$func
237 .PROC
238 .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18
239 .ENTRY
240 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
241 $PUSHMA %r3,$FRAME(%sp)
242 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
243 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
244 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
245 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
246 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
247 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
248 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
249 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
250 $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
251 $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
252 $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
253 $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
254 $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
255 $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp)
256 $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp)
257
258 _shl $num,`log(16*$SZ)/log(2)`,$num
259 addl $inp,$num,$num ; $num to point at the end of $inp
260
261 $PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments
262 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)
263 $PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp)
264
265 blr %r0,$Tbl
266 ldi 3,$t1
267L\$pic
268 andcm $Tbl,$t1,$Tbl ; wipe privilege level
269 ldo L\$table-L\$pic($Tbl),$Tbl
270___
271$code.=<<___ if ($SZ==8 && $SIZE_T==4);
272#ifndef __OpenBSD__
273___
274$code.=<<___ if ($SZ==8 && $SIZE_T==4);
275 ldi 31,$t1
276 mtctl $t1,%cr11
277 extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0
278 b L\$parisc1
279 nop
280___
281$code.=<<___;
282 $LD `0*$SZ`($ctx),$A ; load context
283 $LD `1*$SZ`($ctx),$B
284 $LD `2*$SZ`($ctx),$C
285 $LD `3*$SZ`($ctx),$D
286 $LD `4*$SZ`($ctx),$E
287 $LD `5*$SZ`($ctx),$F
288 $LD `6*$SZ`($ctx),$G
289 $LD `7*$SZ`($ctx),$H
290
291 extru $inp,31,`log($SZ)/log(2)`,$t0
292 sh3addl $t0,%r0,$t0
293 subi `8*$SZ`,$t0,$t0
294 mtctl $t0,%cr11 ; load %sar with align factor
295
296L\$oop
297 ldi `$SZ-1`,$t0
298 $LDM $SZ($Tbl),$t1
299 andcm $inp,$t0,$t0 ; align $inp
300___
301 for ($i=0;$i<15;$i++) { # load input block
302 $code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; }
303$code.=<<___;
304 cmpb,*= $inp,$t0,L\$aligned
305 $LD `$SZ*15`($t0),@X[15]
306 $LD `$SZ*16`($t0),@X[16]
307___
308 for ($i=0;$i<16;$i++) { # align data
309 $code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; }
310$code.=<<___;
311L\$aligned
312 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
313___
314
315for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); }
316$code.=<<___;
317L\$rounds
318 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
319___
320for(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); }
321$code.=<<___;
322 bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled?
323 nop
324
325 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
326 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
327 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
328 ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl
329
330 $LD `0*$SZ`($ctx),@X[0] ; load context
331 $LD `1*$SZ`($ctx),@X[1]
332 $LD `2*$SZ`($ctx),@X[2]
333 $LD `3*$SZ`($ctx),@X[3]
334 $LD `4*$SZ`($ctx),@X[4]
335 $LD `5*$SZ`($ctx),@X[5]
336 addl @X[0],$A,$A
337 $LD `6*$SZ`($ctx),@X[6]
338 addl @X[1],$B,$B
339 $LD `7*$SZ`($ctx),@X[7]
340 ldo `16*$SZ`($inp),$inp ; advance $inp
341
342 $ST $A,`0*$SZ`($ctx) ; save context
343 addl @X[2],$C,$C
344 $ST $B,`1*$SZ`($ctx)
345 addl @X[3],$D,$D
346 $ST $C,`2*$SZ`($ctx)
347 addl @X[4],$E,$E
348 $ST $D,`3*$SZ`($ctx)
349 addl @X[5],$F,$F
350 $ST $E,`4*$SZ`($ctx)
351 addl @X[6],$G,$G
352 $ST $F,`5*$SZ`($ctx)
353 addl @X[7],$H,$H
354 $ST $G,`6*$SZ`($ctx)
355 $ST $H,`7*$SZ`($ctx)
356
357 cmpb,*<>,n $inp,$num,L\$oop
358 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
359___
360if ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0
361{{
362$code.=<<___;
363 b L\$done
364 nop
365
366 .ALIGN 64
367L\$parisc1
368___
369$code.=<<___ if ($SZ==8 && $SIZE_T==4);
370#endif
371___
372
373@V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo,
374 $Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) =
375 ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
376 "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16");
377$a0 ="%r17";
378$a1 ="%r18";
379$a2 ="%r19";
380$a3 ="%r20";
381$t0 ="%r21";
382$t1 ="%r22";
383$t2 ="%r28";
384$t3 ="%r29";
385$Tbl="%r31";
386
387@X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx
388
389sub ROUND_00_15_pa1 {
390my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
391 $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_;
392my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
393
394$code.=<<___ if (!$flag);
395 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
396 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
397___
398$code.=<<___;
399 shd $ehi,$elo,$Sigma1[0],$t0
400 add $Xlo,$hlo,$hlo
401 shd $elo,$ehi,$Sigma1[0],$t1
402 addc $Xhi,$hhi,$hhi ; h += X[i]
403 shd $ehi,$elo,$Sigma1[1],$t2
404 ldwm 8($Tbl),$Xhi
405 shd $elo,$ehi,$Sigma1[1],$t3
406 ldw -4($Tbl),$Xlo ; load K[i]
407 xor $t2,$t0,$t0
408 xor $t3,$t1,$t1
409 and $flo,$elo,$a0
410 and $fhi,$ehi,$a1
411 shd $ehi,$elo,$Sigma1[2],$t2
412 andcm $glo,$elo,$a2
413 shd $elo,$ehi,$Sigma1[2],$t3
414 andcm $ghi,$ehi,$a3
415 xor $t2,$t0,$t0
416 xor $t3,$t1,$t1 ; Sigma1(e)
417 add $Xlo,$hlo,$hlo
418 xor $a2,$a0,$a0
419 addc $Xhi,$hhi,$hhi ; h += K[i]
420 xor $a3,$a1,$a1 ; Ch(e,f,g)
421
422 add $t0,$hlo,$hlo
423 shd $ahi,$alo,$Sigma0[0],$t0
424 addc $t1,$hhi,$hhi ; h += Sigma1(e)
425 shd $alo,$ahi,$Sigma0[0],$t1
426 add $a0,$hlo,$hlo
427 shd $ahi,$alo,$Sigma0[1],$t2
428 addc $a1,$hhi,$hhi ; h += Ch(e,f,g)
429 shd $alo,$ahi,$Sigma0[1],$t3
430
431 xor $t2,$t0,$t0
432 xor $t3,$t1,$t1
433 shd $ahi,$alo,$Sigma0[2],$t2
434 and $alo,$blo,$a0
435 shd $alo,$ahi,$Sigma0[2],$t3
436 and $ahi,$bhi,$a1
437 xor $t2,$t0,$t0
438 xor $t3,$t1,$t1 ; Sigma0(a)
439
440 and $alo,$clo,$a2
441 and $ahi,$chi,$a3
442 xor $a2,$a0,$a0
443 add $hlo,$dlo,$dlo
444 xor $a3,$a1,$a1
445 addc $hhi,$dhi,$dhi ; d += h
446 and $blo,$clo,$a2
447 add $t0,$hlo,$hlo
448 and $bhi,$chi,$a3
449 addc $t1,$hhi,$hhi ; h += Sigma0(a)
450 xor $a2,$a0,$a0
451 add $a0,$hlo,$hlo
452 xor $a3,$a1,$a1 ; Maj(a,b,c)
453 addc $a1,$hhi,$hhi ; h += Maj(a,b,c)
454
455___
456$code.=<<___ if ($i==15 && $flag);
457 extru $Xlo,31,10,$Xlo
458 comiclr,= $LAST10BITS,$Xlo,%r0
459 b L\$rounds_pa1
460 nop
461___
462push(@X,shift(@X)); push(@X,shift(@X));
463}
464
465sub ROUND_16_xx_pa1 {
466my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
467my ($i)=shift;
468$i-=16;
469$code.=<<___;
470 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
471 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
472 ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1
473 ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9]
474 ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3
475 ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14]
476 shd $Xnhi,$Xnlo,$sigma0[0],$t0
477 shd $Xnlo,$Xnhi,$sigma0[0],$t1
478 add $a0,$Xlo,$Xlo
479 shd $Xnhi,$Xnlo,$sigma0[1],$t2
480 addc $a1,$Xhi,$Xhi
481 shd $Xnlo,$Xnhi,$sigma0[1],$t3
482 xor $t2,$t0,$t0
483 shd $Xnhi,$Xnlo,$sigma0[2],$t2
484 xor $t3,$t1,$t1
485 extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3
486 xor $t2,$t0,$t0
487 shd $a3,$a2,$sigma1[0],$a0
488 xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f])
489 shd $a2,$a3,$sigma1[0],$a1
490 add $t0,$Xlo,$Xlo
491 shd $a3,$a2,$sigma1[1],$t2
492 addc $t1,$Xhi,$Xhi
493 shd $a2,$a3,$sigma1[1],$t3
494 xor $t2,$a0,$a0
495 shd $a3,$a2,$sigma1[2],$t2
496 xor $t3,$a1,$a1
497 extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3
498 xor $t2,$a0,$a0
499 xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f])
500 add $a0,$Xlo,$Xlo
501 addc $a1,$Xhi,$Xhi
502
503 stw $Xhi,`-$XOFF+8*($i%16)`(%sp)
504 stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp)
505___
506&ROUND_00_15_pa1($i,@_,1);
507}
508$code.=<<___;
509 ldw `0*4`($ctx),$Ahi ; load context
510 ldw `1*4`($ctx),$Alo
511 ldw `2*4`($ctx),$Bhi
512 ldw `3*4`($ctx),$Blo
513 ldw `4*4`($ctx),$Chi
514 ldw `5*4`($ctx),$Clo
515 ldw `6*4`($ctx),$Dhi
516 ldw `7*4`($ctx),$Dlo
517 ldw `8*4`($ctx),$Ehi
518 ldw `9*4`($ctx),$Elo
519 ldw `10*4`($ctx),$Fhi
520 ldw `11*4`($ctx),$Flo
521 ldw `12*4`($ctx),$Ghi
522 ldw `13*4`($ctx),$Glo
523 ldw `14*4`($ctx),$Hhi
524 ldw `15*4`($ctx),$Hlo
525
526 extru $inp,31,2,$t0
527 sh3addl $t0,%r0,$t0
528 subi 32,$t0,$t0
529 mtctl $t0,%cr11 ; load %sar with align factor
530
531L\$oop_pa1
532 extru $inp,31,2,$a3
533 comib,= 0,$a3,L\$aligned_pa1
534 sub $inp,$a3,$inp
535
536 ldw `0*4`($inp),$X[0]
537 ldw `1*4`($inp),$X[1]
538 ldw `2*4`($inp),$t2
539 ldw `3*4`($inp),$t3
540 ldw `4*4`($inp),$a0
541 ldw `5*4`($inp),$a1
542 ldw `6*4`($inp),$a2
543 ldw `7*4`($inp),$a3
544 vshd $X[0],$X[1],$X[0]
545 vshd $X[1],$t2,$X[1]
546 stw $X[0],`-$XOFF+0*4`(%sp)
547 ldw `8*4`($inp),$t0
548 vshd $t2,$t3,$t2
549 stw $X[1],`-$XOFF+1*4`(%sp)
550 ldw `9*4`($inp),$t1
551 vshd $t3,$a0,$t3
552___
553{
554my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
555for ($i=2;$i<=(128/4-8);$i++) {
556$code.=<<___;
557 stw $t[0],`-$XOFF+$i*4`(%sp)
558 ldw `(8+$i)*4`($inp),$t[0]
559 vshd $t[1],$t[2],$t[1]
560___
561push(@t,shift(@t));
562}
563for (;$i<(128/4-1);$i++) {
564$code.=<<___;
565 stw $t[0],`-$XOFF+$i*4`(%sp)
566 vshd $t[1],$t[2],$t[1]
567___
568push(@t,shift(@t));
569}
570$code.=<<___;
571 b L\$collected_pa1
572 stw $t[0],`-$XOFF+$i*4`(%sp)
573
574___
575}
576$code.=<<___;
577L\$aligned_pa1
578 ldw `0*4`($inp),$X[0]
579 ldw `1*4`($inp),$X[1]
580 ldw `2*4`($inp),$t2
581 ldw `3*4`($inp),$t3
582 ldw `4*4`($inp),$a0
583 ldw `5*4`($inp),$a1
584 ldw `6*4`($inp),$a2
585 ldw `7*4`($inp),$a3
586 stw $X[0],`-$XOFF+0*4`(%sp)
587 ldw `8*4`($inp),$t0
588 stw $X[1],`-$XOFF+1*4`(%sp)
589 ldw `9*4`($inp),$t1
590___
591{
592my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
593for ($i=2;$i<(128/4-8);$i++) {
594$code.=<<___;
595 stw $t[0],`-$XOFF+$i*4`(%sp)
596 ldw `(8+$i)*4`($inp),$t[0]
597___
598push(@t,shift(@t));
599}
600for (;$i<128/4;$i++) {
601$code.=<<___;
602 stw $t[0],`-$XOFF+$i*4`(%sp)
603___
604push(@t,shift(@t));
605}
606$code.="L\$collected_pa1\n";
607}
608
609for($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
610$code.="L\$rounds_pa1\n";
611for(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
612
613$code.=<<___;
614 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
615 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
616 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
617 ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl
618
619 ldw `0*4`($ctx),$t1 ; update context
620 ldw `1*4`($ctx),$t0
621 ldw `2*4`($ctx),$t3
622 ldw `3*4`($ctx),$t2
623 ldw `4*4`($ctx),$a1
624 ldw `5*4`($ctx),$a0
625 ldw `6*4`($ctx),$a3
626 add $t0,$Alo,$Alo
627 ldw `7*4`($ctx),$a2
628 addc $t1,$Ahi,$Ahi
629 ldw `8*4`($ctx),$t1
630 add $t2,$Blo,$Blo
631 ldw `9*4`($ctx),$t0
632 addc $t3,$Bhi,$Bhi
633 ldw `10*4`($ctx),$t3
634 add $a0,$Clo,$Clo
635 ldw `11*4`($ctx),$t2
636 addc $a1,$Chi,$Chi
637 ldw `12*4`($ctx),$a1
638 add $a2,$Dlo,$Dlo
639 ldw `13*4`($ctx),$a0
640 addc $a3,$Dhi,$Dhi
641 ldw `14*4`($ctx),$a3
642 add $t0,$Elo,$Elo
643 ldw `15*4`($ctx),$a2
644 addc $t1,$Ehi,$Ehi
645 stw $Ahi,`0*4`($ctx)
646 add $t2,$Flo,$Flo
647 stw $Alo,`1*4`($ctx)
648 addc $t3,$Fhi,$Fhi
649 stw $Bhi,`2*4`($ctx)
650 add $a0,$Glo,$Glo
651 stw $Blo,`3*4`($ctx)
652 addc $a1,$Ghi,$Ghi
653 stw $Chi,`4*4`($ctx)
654 add $a2,$Hlo,$Hlo
655 stw $Clo,`5*4`($ctx)
656 addc $a3,$Hhi,$Hhi
657 stw $Dhi,`6*4`($ctx)
658 ldo `16*$SZ`($inp),$inp ; advance $inp
659 stw $Dlo,`7*4`($ctx)
660 stw $Ehi,`8*4`($ctx)
661 stw $Elo,`9*4`($ctx)
662 stw $Fhi,`10*4`($ctx)
663 stw $Flo,`11*4`($ctx)
664 stw $Ghi,`12*4`($ctx)
665 stw $Glo,`13*4`($ctx)
666 stw $Hhi,`14*4`($ctx)
667 comb,= $inp,$num,L\$done
668 stw $Hlo,`15*4`($ctx)
669 b L\$oop_pa1
670 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
671L\$done
672___
673}}
674$code.=<<___;
675 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
676 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
677 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
678 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
679 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
680 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
681 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
682 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
683 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
684 $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
685 $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
686 $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
687 $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
688 $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
689 $POP `-$FRAME+14*$SIZE_T`(%sp),%r17
690 $POP `-$FRAME+15*$SIZE_T`(%sp),%r18
691 bv (%r2)
692 .EXIT
693 $POPMB -$FRAME(%sp),%r3
694 .PROCEND
695
696 .data
697 .STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
698___
699
700# Explicitly encode PA-RISC 2.0 instructions used in this module, so
701# that it can be compiled with .LEVEL 1.0. It should be noted that I
702# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
703# directive...
704
705my $ldd = sub {
706 my ($mod,$args) = @_;
707 my $orig = "ldd$mod\t$args";
708
709 if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices
710 { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1);
711 $opcode|=(1<<3) if ($mod =~ /^,m/);
712 $opcode|=(1<<2) if ($mod =~ /^,mb/);
713 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
714 }
715 else { "\t".$orig; }
716};
717
718my $std = sub {
719 my ($mod,$args) = @_;
720 my $orig = "std$mod\t$args";
721
722 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
723 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
724 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
725 }
726 else { "\t".$orig; }
727};
728
729my $extrd = sub {
730 my ($mod,$args) = @_;
731 my $orig = "extrd$mod\t$args";
732
733 # I only have ",u" completer, it's implicitly encoded...
734 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
735 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
736 my $len=32-$3;
737 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
738 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
739 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
740 }
741 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
742 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
743 my $len=32-$2;
744 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
745 $opcode |= (1<<13) if ($mod =~ /,\**=/);
746 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
747 }
748 else { "\t".$orig; }
749};
750
751my $shrpd = sub {
752 my ($mod,$args) = @_;
753 my $orig = "shrpd$mod\t$args";
754
755 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
756 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
757 my $cpos=63-$3;
758 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
759 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
760 }
761 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
762 { sprintf "\t.WORD\t0x%08x\t; %s",
763 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
764 }
765 else { "\t".$orig; }
766};
767
768sub assemble {
769 my ($mnemonic,$mod,$args)=@_;
770 my $opcode = eval("\$$mnemonic");
771
772 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
773}
774
775foreach (split("\n",$code)) {
776 s/\`([^\`]*)\`/eval $1/ge;
777
778 s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/
779 $3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for >=32
780 : sprintf("shd\t%$1,%$2,%d",$3)/e or
781 # translate made up instructons: _ror, _shr, _align, _shl
782 s/_ror(\s+)(%r[0-9]+),/
783 ($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or
784
785 s/_shr(\s+%r[0-9]+),([0-9]+),/
786 $SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2)
787 : sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or
788
789 s/_align(\s+%r[0-9]+,%r[0-9]+),/
790 ($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or
791
792 s/_shl(\s+%r[0-9]+),([0-9]+),/
793 $SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2)
794 : sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e;
795
796 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4);
797
798 s/cmpb,\*/comb,/ if ($SIZE_T==4);
799
800 s/\bbv\b/bve/ if ($SIZE_T==8);
801
802 print $_,"\n";
803}
804
805close STDOUT;