summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/modes/asm/ghash-parisc.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/modes/asm/ghash-parisc.pl')
-rw-r--r--src/lib/libcrypto/modes/asm/ghash-parisc.pl741
1 files changed, 0 insertions, 741 deletions
diff --git a/src/lib/libcrypto/modes/asm/ghash-parisc.pl b/src/lib/libcrypto/modes/asm/ghash-parisc.pl
deleted file mode 100644
index 965802d3fa..0000000000
--- a/src/lib/libcrypto/modes/asm/ghash-parisc.pl
+++ /dev/null
@@ -1,741 +0,0 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# April 2010
11#
12# The module implements "4-bit" GCM GHASH function and underlying
13# single multiplication operation in GF(2^128). "4-bit" means that it
14# uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC
15# it processes one byte in 19.6 cycles, which is more than twice as
16# fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for
17# 8 cycles, but measured performance on PA-8600 system is ~9 cycles per
18# processed byte. This is ~2.2x faster than 64-bit code generated by
19# vendor compiler (which used to be very hard to beat:-).
20#
21# Special thanks to polarhome.com for providing HP-UX account.
22
23$flavour = shift;
24$output = shift;
25open STDOUT,">$output";
26
27if ($flavour =~ /64/) {
28 $LEVEL ="2.0W";
29 $SIZE_T =8;
30 $FRAME_MARKER =80;
31 $SAVED_RP =16;
32 $PUSH ="std";
33 $PUSHMA ="std,ma";
34 $POP ="ldd";
35 $POPMB ="ldd,mb";
36 $NREGS =6;
37} else {
38 $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0";
39 $SIZE_T =4;
40 $FRAME_MARKER =48;
41 $SAVED_RP =20;
42 $PUSH ="stw";
43 $PUSHMA ="stwm";
44 $POP ="ldw";
45 $POPMB ="ldwm";
46 $NREGS =11;
47}
48
49$FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker
50 # [+ argument transfer]
51
52################# volatile registers
53$Xi="%r26"; # argument block
54$Htbl="%r25";
55$inp="%r24";
56$len="%r23";
57$Hhh=$Htbl; # variables
58$Hll="%r22";
59$Zhh="%r21";
60$Zll="%r20";
61$cnt="%r19";
62$rem_4bit="%r28";
63$rem="%r29";
64$mask0xf0="%r31";
65
66################# preserved registers
67$Thh="%r1";
68$Tll="%r2";
69$nlo="%r3";
70$nhi="%r4";
71$byte="%r5";
72if ($SIZE_T==4) {
73 $Zhl="%r6";
74 $Zlh="%r7";
75 $Hhl="%r8";
76 $Hlh="%r9";
77 $Thl="%r10";
78 $Tlh="%r11";
79}
80$rem2="%r6"; # used in PA-RISC 2.0 code
81
82$code.=<<___;
83 .LEVEL $LEVEL
84#if 0
85 .SPACE \$TEXT\$
86 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
87#else
88 .text
89#endif
90
91 .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR
92 .ALIGN 64
93gcm_gmult_4bit
94 .PROC
95 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS
96 .ENTRY
97 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
98 $PUSHMA %r3,$FRAME(%sp)
99 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
100 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
101 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
102___
103$code.=<<___ if ($SIZE_T==4);
104 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
105 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
106 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
107 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
108 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
109___
110$code.=<<___;
111 blr %r0,$rem_4bit
112 ldi 3,$rem
113L\$pic_gmult
114 andcm $rem_4bit,$rem,$rem_4bit
115 addl $inp,$len,$len
116 ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit
117 ldi 0xf0,$mask0xf0
118___
119$code.=<<___ if ($SIZE_T==4);
120#ifndef __OpenBSD__
121 ldi 31,$rem
122 mtctl $rem,%cr11
123 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
124 b L\$parisc1_gmult
125 nop
126___
127
128$code.=<<___;
129 ldb 15($Xi),$nlo
130 ldo 8($Htbl),$Hll
131
132 and $mask0xf0,$nlo,$nhi
133 depd,z $nlo,59,4,$nlo
134
135 ldd $nlo($Hll),$Zll
136 ldd $nlo($Hhh),$Zhh
137
138 depd,z $Zll,60,4,$rem
139 shrpd $Zhh,$Zll,4,$Zll
140 extrd,u $Zhh,59,60,$Zhh
141 ldb 14($Xi),$nlo
142
143 ldd $nhi($Hll),$Tll
144 ldd $nhi($Hhh),$Thh
145 and $mask0xf0,$nlo,$nhi
146 depd,z $nlo,59,4,$nlo
147
148 xor $Tll,$Zll,$Zll
149 xor $Thh,$Zhh,$Zhh
150 ldd $rem($rem_4bit),$rem
151 b L\$oop_gmult_pa2
152 ldi 13,$cnt
153
154 .ALIGN 8
155L\$oop_gmult_pa2
156 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
157 depd,z $Zll,60,4,$rem
158
159 shrpd $Zhh,$Zll,4,$Zll
160 extrd,u $Zhh,59,60,$Zhh
161 ldd $nlo($Hll),$Tll
162 ldd $nlo($Hhh),$Thh
163
164 xor $Tll,$Zll,$Zll
165 xor $Thh,$Zhh,$Zhh
166 ldd $rem($rem_4bit),$rem
167
168 xor $rem,$Zhh,$Zhh
169 depd,z $Zll,60,4,$rem
170 ldbx $cnt($Xi),$nlo
171
172 shrpd $Zhh,$Zll,4,$Zll
173 extrd,u $Zhh,59,60,$Zhh
174 ldd $nhi($Hll),$Tll
175 ldd $nhi($Hhh),$Thh
176
177 and $mask0xf0,$nlo,$nhi
178 depd,z $nlo,59,4,$nlo
179 ldd $rem($rem_4bit),$rem
180
181 xor $Tll,$Zll,$Zll
182 addib,uv -1,$cnt,L\$oop_gmult_pa2
183 xor $Thh,$Zhh,$Zhh
184
185 xor $rem,$Zhh,$Zhh
186 depd,z $Zll,60,4,$rem
187
188 shrpd $Zhh,$Zll,4,$Zll
189 extrd,u $Zhh,59,60,$Zhh
190 ldd $nlo($Hll),$Tll
191 ldd $nlo($Hhh),$Thh
192
193 xor $Tll,$Zll,$Zll
194 xor $Thh,$Zhh,$Zhh
195 ldd $rem($rem_4bit),$rem
196
197 xor $rem,$Zhh,$Zhh
198 depd,z $Zll,60,4,$rem
199
200 shrpd $Zhh,$Zll,4,$Zll
201 extrd,u $Zhh,59,60,$Zhh
202 ldd $nhi($Hll),$Tll
203 ldd $nhi($Hhh),$Thh
204
205 xor $Tll,$Zll,$Zll
206 xor $Thh,$Zhh,$Zhh
207 ldd $rem($rem_4bit),$rem
208
209 xor $rem,$Zhh,$Zhh
210 std $Zll,8($Xi)
211 std $Zhh,0($Xi)
212___
213
214$code.=<<___ if ($SIZE_T==4);
215 b L\$done_gmult
216 nop
217
218L\$parisc1_gmult
219#endif
220 ldb 15($Xi),$nlo
221 ldo 12($Htbl),$Hll
222 ldo 8($Htbl),$Hlh
223 ldo 4($Htbl),$Hhl
224
225 and $mask0xf0,$nlo,$nhi
226 zdep $nlo,27,4,$nlo
227
228 ldwx $nlo($Hll),$Zll
229 ldwx $nlo($Hlh),$Zlh
230 ldwx $nlo($Hhl),$Zhl
231 ldwx $nlo($Hhh),$Zhh
232 zdep $Zll,28,4,$rem
233 ldb 14($Xi),$nlo
234 ldwx $rem($rem_4bit),$rem
235 shrpw $Zlh,$Zll,4,$Zll
236 ldwx $nhi($Hll),$Tll
237 shrpw $Zhl,$Zlh,4,$Zlh
238 ldwx $nhi($Hlh),$Tlh
239 shrpw $Zhh,$Zhl,4,$Zhl
240 ldwx $nhi($Hhl),$Thl
241 extru $Zhh,27,28,$Zhh
242 ldwx $nhi($Hhh),$Thh
243 xor $rem,$Zhh,$Zhh
244 and $mask0xf0,$nlo,$nhi
245 zdep $nlo,27,4,$nlo
246
247 xor $Tll,$Zll,$Zll
248 ldwx $nlo($Hll),$Tll
249 xor $Tlh,$Zlh,$Zlh
250 ldwx $nlo($Hlh),$Tlh
251 xor $Thl,$Zhl,$Zhl
252 b L\$oop_gmult_pa1
253 ldi 13,$cnt
254
255 .ALIGN 8
256L\$oop_gmult_pa1
257 zdep $Zll,28,4,$rem
258 ldwx $nlo($Hhl),$Thl
259 xor $Thh,$Zhh,$Zhh
260 ldwx $rem($rem_4bit),$rem
261 shrpw $Zlh,$Zll,4,$Zll
262 ldwx $nlo($Hhh),$Thh
263 shrpw $Zhl,$Zlh,4,$Zlh
264 ldbx $cnt($Xi),$nlo
265 xor $Tll,$Zll,$Zll
266 ldwx $nhi($Hll),$Tll
267 shrpw $Zhh,$Zhl,4,$Zhl
268 xor $Tlh,$Zlh,$Zlh
269 ldwx $nhi($Hlh),$Tlh
270 extru $Zhh,27,28,$Zhh
271 xor $Thl,$Zhl,$Zhl
272 ldwx $nhi($Hhl),$Thl
273 xor $rem,$Zhh,$Zhh
274 zdep $Zll,28,4,$rem
275 xor $Thh,$Zhh,$Zhh
276 ldwx $nhi($Hhh),$Thh
277 shrpw $Zlh,$Zll,4,$Zll
278 ldwx $rem($rem_4bit),$rem
279 shrpw $Zhl,$Zlh,4,$Zlh
280 shrpw $Zhh,$Zhl,4,$Zhl
281 and $mask0xf0,$nlo,$nhi
282 extru $Zhh,27,28,$Zhh
283 zdep $nlo,27,4,$nlo
284 xor $Tll,$Zll,$Zll
285 ldwx $nlo($Hll),$Tll
286 xor $Tlh,$Zlh,$Zlh
287 ldwx $nlo($Hlh),$Tlh
288 xor $rem,$Zhh,$Zhh
289 addib,uv -1,$cnt,L\$oop_gmult_pa1
290 xor $Thl,$Zhl,$Zhl
291
292 zdep $Zll,28,4,$rem
293 ldwx $nlo($Hhl),$Thl
294 xor $Thh,$Zhh,$Zhh
295 ldwx $rem($rem_4bit),$rem
296 shrpw $Zlh,$Zll,4,$Zll
297 ldwx $nlo($Hhh),$Thh
298 shrpw $Zhl,$Zlh,4,$Zlh
299 xor $Tll,$Zll,$Zll
300 ldwx $nhi($Hll),$Tll
301 shrpw $Zhh,$Zhl,4,$Zhl
302 xor $Tlh,$Zlh,$Zlh
303 ldwx $nhi($Hlh),$Tlh
304 extru $Zhh,27,28,$Zhh
305 xor $rem,$Zhh,$Zhh
306 xor $Thl,$Zhl,$Zhl
307 ldwx $nhi($Hhl),$Thl
308 xor $Thh,$Zhh,$Zhh
309 ldwx $nhi($Hhh),$Thh
310 zdep $Zll,28,4,$rem
311 ldwx $rem($rem_4bit),$rem
312 shrpw $Zlh,$Zll,4,$Zll
313 shrpw $Zhl,$Zlh,4,$Zlh
314 shrpw $Zhh,$Zhl,4,$Zhl
315 extru $Zhh,27,28,$Zhh
316 xor $Tll,$Zll,$Zll
317 xor $Tlh,$Zlh,$Zlh
318 xor $rem,$Zhh,$Zhh
319 stw $Zll,12($Xi)
320 xor $Thl,$Zhl,$Zhl
321 stw $Zlh,8($Xi)
322 xor $Thh,$Zhh,$Zhh
323 stw $Zhl,4($Xi)
324 stw $Zhh,0($Xi)
325___
326$code.=<<___;
327L\$done_gmult
328 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
329 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
330 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
331 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
332___
333$code.=<<___ if ($SIZE_T==4);
334 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
335 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
336 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
337 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
338 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
339___
340$code.=<<___;
341 bv (%r2)
342 .EXIT
343 $POPMB -$FRAME(%sp),%r3
344 .PROCEND
345
346 .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
347 .ALIGN 64
348gcm_ghash_4bit
349 .PROC
350 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11
351 .ENTRY
352 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
353 $PUSHMA %r3,$FRAME(%sp)
354 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
355 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
356 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
357___
358$code.=<<___ if ($SIZE_T==4);
359 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
360 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
361 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
362 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
363 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
364___
365$code.=<<___;
366 blr %r0,$rem_4bit
367 ldi 3,$rem
368L\$pic_ghash
369 andcm $rem_4bit,$rem,$rem_4bit
370 addl $inp,$len,$len
371 ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit
372 ldi 0xf0,$mask0xf0
373___
374$code.=<<___ if ($SIZE_T==4);
375#ifndef __OpenBSD__
376 ldi 31,$rem
377 mtctl $rem,%cr11
378 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
379 b L\$parisc1_ghash
380 nop
381___
382
383$code.=<<___;
384 ldb 15($Xi),$nlo
385 ldo 8($Htbl),$Hll
386
387L\$outer_ghash_pa2
388 ldb 15($inp),$nhi
389 xor $nhi,$nlo,$nlo
390 and $mask0xf0,$nlo,$nhi
391 depd,z $nlo,59,4,$nlo
392
393 ldd $nlo($Hll),$Zll
394 ldd $nlo($Hhh),$Zhh
395
396 depd,z $Zll,60,4,$rem
397 shrpd $Zhh,$Zll,4,$Zll
398 extrd,u $Zhh,59,60,$Zhh
399 ldb 14($Xi),$nlo
400 ldb 14($inp),$byte
401
402 ldd $nhi($Hll),$Tll
403 ldd $nhi($Hhh),$Thh
404 xor $byte,$nlo,$nlo
405 and $mask0xf0,$nlo,$nhi
406 depd,z $nlo,59,4,$nlo
407
408 xor $Tll,$Zll,$Zll
409 xor $Thh,$Zhh,$Zhh
410 ldd $rem($rem_4bit),$rem
411 b L\$oop_ghash_pa2
412 ldi 13,$cnt
413
414 .ALIGN 8
415L\$oop_ghash_pa2
416 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
417 depd,z $Zll,60,4,$rem2
418
419 shrpd $Zhh,$Zll,4,$Zll
420 extrd,u $Zhh,59,60,$Zhh
421 ldd $nlo($Hll),$Tll
422 ldd $nlo($Hhh),$Thh
423
424 xor $Tll,$Zll,$Zll
425 xor $Thh,$Zhh,$Zhh
426 ldbx $cnt($Xi),$nlo
427 ldbx $cnt($inp),$byte
428
429 depd,z $Zll,60,4,$rem
430 shrpd $Zhh,$Zll,4,$Zll
431 ldd $rem2($rem_4bit),$rem2
432
433 xor $rem2,$Zhh,$Zhh
434 xor $byte,$nlo,$nlo
435 ldd $nhi($Hll),$Tll
436 ldd $nhi($Hhh),$Thh
437
438 and $mask0xf0,$nlo,$nhi
439 depd,z $nlo,59,4,$nlo
440
441 extrd,u $Zhh,59,60,$Zhh
442 xor $Tll,$Zll,$Zll
443
444 ldd $rem($rem_4bit),$rem
445 addib,uv -1,$cnt,L\$oop_ghash_pa2
446 xor $Thh,$Zhh,$Zhh
447
448 xor $rem,$Zhh,$Zhh
449 depd,z $Zll,60,4,$rem2
450
451 shrpd $Zhh,$Zll,4,$Zll
452 extrd,u $Zhh,59,60,$Zhh
453 ldd $nlo($Hll),$Tll
454 ldd $nlo($Hhh),$Thh
455
456 xor $Tll,$Zll,$Zll
457 xor $Thh,$Zhh,$Zhh
458
459 depd,z $Zll,60,4,$rem
460 shrpd $Zhh,$Zll,4,$Zll
461 ldd $rem2($rem_4bit),$rem2
462
463 xor $rem2,$Zhh,$Zhh
464 ldd $nhi($Hll),$Tll
465 ldd $nhi($Hhh),$Thh
466
467 extrd,u $Zhh,59,60,$Zhh
468 xor $Tll,$Zll,$Zll
469 xor $Thh,$Zhh,$Zhh
470 ldd $rem($rem_4bit),$rem
471
472 xor $rem,$Zhh,$Zhh
473 std $Zll,8($Xi)
474 ldo 16($inp),$inp
475 std $Zhh,0($Xi)
476 cmpb,*<> $inp,$len,L\$outer_ghash_pa2
477 copy $Zll,$nlo
478___
479
480$code.=<<___ if ($SIZE_T==4);
481 b L\$done_ghash
482 nop
483
484L\$parisc1_ghash
485#endif
486 ldb 15($Xi),$nlo
487 ldo 12($Htbl),$Hll
488 ldo 8($Htbl),$Hlh
489 ldo 4($Htbl),$Hhl
490
491L\$outer_ghash_pa1
492 ldb 15($inp),$byte
493 xor $byte,$nlo,$nlo
494 and $mask0xf0,$nlo,$nhi
495 zdep $nlo,27,4,$nlo
496
497 ldwx $nlo($Hll),$Zll
498 ldwx $nlo($Hlh),$Zlh
499 ldwx $nlo($Hhl),$Zhl
500 ldwx $nlo($Hhh),$Zhh
501 zdep $Zll,28,4,$rem
502 ldb 14($Xi),$nlo
503 ldb 14($inp),$byte
504 ldwx $rem($rem_4bit),$rem
505 shrpw $Zlh,$Zll,4,$Zll
506 ldwx $nhi($Hll),$Tll
507 shrpw $Zhl,$Zlh,4,$Zlh
508 ldwx $nhi($Hlh),$Tlh
509 shrpw $Zhh,$Zhl,4,$Zhl
510 ldwx $nhi($Hhl),$Thl
511 extru $Zhh,27,28,$Zhh
512 ldwx $nhi($Hhh),$Thh
513 xor $byte,$nlo,$nlo
514 xor $rem,$Zhh,$Zhh
515 and $mask0xf0,$nlo,$nhi
516 zdep $nlo,27,4,$nlo
517
518 xor $Tll,$Zll,$Zll
519 ldwx $nlo($Hll),$Tll
520 xor $Tlh,$Zlh,$Zlh
521 ldwx $nlo($Hlh),$Tlh
522 xor $Thl,$Zhl,$Zhl
523 b L\$oop_ghash_pa1
524 ldi 13,$cnt
525
526 .ALIGN 8
527L\$oop_ghash_pa1
528 zdep $Zll,28,4,$rem
529 ldwx $nlo($Hhl),$Thl
530 xor $Thh,$Zhh,$Zhh
531 ldwx $rem($rem_4bit),$rem
532 shrpw $Zlh,$Zll,4,$Zll
533 ldwx $nlo($Hhh),$Thh
534 shrpw $Zhl,$Zlh,4,$Zlh
535 ldbx $cnt($Xi),$nlo
536 xor $Tll,$Zll,$Zll
537 ldwx $nhi($Hll),$Tll
538 shrpw $Zhh,$Zhl,4,$Zhl
539 ldbx $cnt($inp),$byte
540 xor $Tlh,$Zlh,$Zlh
541 ldwx $nhi($Hlh),$Tlh
542 extru $Zhh,27,28,$Zhh
543 xor $Thl,$Zhl,$Zhl
544 ldwx $nhi($Hhl),$Thl
545 xor $rem,$Zhh,$Zhh
546 zdep $Zll,28,4,$rem
547 xor $Thh,$Zhh,$Zhh
548 ldwx $nhi($Hhh),$Thh
549 shrpw $Zlh,$Zll,4,$Zll
550 ldwx $rem($rem_4bit),$rem
551 shrpw $Zhl,$Zlh,4,$Zlh
552 xor $byte,$nlo,$nlo
553 shrpw $Zhh,$Zhl,4,$Zhl
554 and $mask0xf0,$nlo,$nhi
555 extru $Zhh,27,28,$Zhh
556 zdep $nlo,27,4,$nlo
557 xor $Tll,$Zll,$Zll
558 ldwx $nlo($Hll),$Tll
559 xor $Tlh,$Zlh,$Zlh
560 ldwx $nlo($Hlh),$Tlh
561 xor $rem,$Zhh,$Zhh
562 addib,uv -1,$cnt,L\$oop_ghash_pa1
563 xor $Thl,$Zhl,$Zhl
564
565 zdep $Zll,28,4,$rem
566 ldwx $nlo($Hhl),$Thl
567 xor $Thh,$Zhh,$Zhh
568 ldwx $rem($rem_4bit),$rem
569 shrpw $Zlh,$Zll,4,$Zll
570 ldwx $nlo($Hhh),$Thh
571 shrpw $Zhl,$Zlh,4,$Zlh
572 xor $Tll,$Zll,$Zll
573 ldwx $nhi($Hll),$Tll
574 shrpw $Zhh,$Zhl,4,$Zhl
575 xor $Tlh,$Zlh,$Zlh
576 ldwx $nhi($Hlh),$Tlh
577 extru $Zhh,27,28,$Zhh
578 xor $rem,$Zhh,$Zhh
579 xor $Thl,$Zhl,$Zhl
580 ldwx $nhi($Hhl),$Thl
581 xor $Thh,$Zhh,$Zhh
582 ldwx $nhi($Hhh),$Thh
583 zdep $Zll,28,4,$rem
584 ldwx $rem($rem_4bit),$rem
585 shrpw $Zlh,$Zll,4,$Zll
586 shrpw $Zhl,$Zlh,4,$Zlh
587 shrpw $Zhh,$Zhl,4,$Zhl
588 extru $Zhh,27,28,$Zhh
589 xor $Tll,$Zll,$Zll
590 xor $Tlh,$Zlh,$Zlh
591 xor $rem,$Zhh,$Zhh
592 stw $Zll,12($Xi)
593 xor $Thl,$Zhl,$Zhl
594 stw $Zlh,8($Xi)
595 xor $Thh,$Zhh,$Zhh
596 stw $Zhl,4($Xi)
597 ldo 16($inp),$inp
598 stw $Zhh,0($Xi)
599 comb,<> $inp,$len,L\$outer_ghash_pa1
600 copy $Zll,$nlo
601___
602$code.=<<___;
603L\$done_ghash
604 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
605 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
606 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
607 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
608___
609$code.=<<___ if ($SIZE_T==4);
610 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
611 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
612 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
613 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
614 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
615___
616$code.=<<___;
617 bv (%r2)
618 .EXIT
619 $POPMB -$FRAME(%sp),%r3
620 .PROCEND
621
622 .ALIGN 64
623L\$rem_4bit
624 .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0
625 .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0
626 .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0
627 .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0
628
629 .data
630 .STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>"
631 .ALIGN 64
632___
633
634# Explicitly encode PA-RISC 2.0 instructions used in this module, so
635# that it can be compiled with .LEVEL 1.0. It should be noted that I
636# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
637# directive...
638
639my $ldd = sub {
640 my ($mod,$args) = @_;
641 my $orig = "ldd$mod\t$args";
642
643 if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4
644 { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3;
645 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
646 }
647 elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5
648 { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3;
649 $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset
650 $opcode|=(1<<5) if ($mod =~ /^,m/);
651 $opcode|=(1<<13) if ($mod =~ /^,mb/);
652 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
653 }
654 else { "\t".$orig; }
655};
656
657my $std = sub {
658 my ($mod,$args) = @_;
659 my $orig = "std$mod\t$args";
660
661 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
662 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
663 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
664 }
665 else { "\t".$orig; }
666};
667
668my $extrd = sub {
669 my ($mod,$args) = @_;
670 my $orig = "extrd$mod\t$args";
671
672 # I only have ",u" completer, it's implicitly encoded...
673 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
674 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
675 my $len=32-$3;
676 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
677 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
678 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
679 }
680 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
681 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
682 my $len=32-$2;
683 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
684 $opcode |= (1<<13) if ($mod =~ /,\**=/);
685 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
686 }
687 else { "\t".$orig; }
688};
689
690my $shrpd = sub {
691 my ($mod,$args) = @_;
692 my $orig = "shrpd$mod\t$args";
693
694 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
695 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
696 my $cpos=63-$3;
697 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
698 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
699 }
700 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
701 { sprintf "\t.WORD\t0x%08x\t; %s",
702 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
703 }
704 else { "\t".$orig; }
705};
706
707my $depd = sub {
708 my ($mod,$args) = @_;
709 my $orig = "depd$mod\t$args";
710
711 # I only have ",z" completer, it's implicitly encoded...
712 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16
713 { my $opcode=(0x3c<<26)|($4<<21)|($1<<16);
714 my $cpos=63-$2;
715 my $len=32-$3;
716 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos
717 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
718 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
719 }
720 else { "\t".$orig; }
721};
722
723sub assemble {
724 my ($mnemonic,$mod,$args)=@_;
725 my $opcode = eval("\$$mnemonic");
726
727 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
728}
729
730foreach (split("\n",$code)) {
731 s/\`([^\`]*)\`/eval $1/ge;
732 if ($SIZE_T==4) {
733 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e;
734 s/cmpb,\*/comb,/;
735 s/,\*/,/;
736 }
737 s/\bbv\b/bve/ if ($SIZE_T==8);
738 print $_,"\n";
739}
740
741close STDOUT;